summaryrefslogtreecommitdiff
path: root/src/mscorlib/shared/System/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/mscorlib/shared/System/Text')
-rw-r--r--src/mscorlib/shared/System/Text/ASCIIEncoding.cs973
-rw-r--r--src/mscorlib/shared/System/Text/Decoder.cs339
-rw-r--r--src/mscorlib/shared/System/Text/Encoder.cs333
-rw-r--r--src/mscorlib/shared/System/Text/EncodingInfo.cs72
-rw-r--r--src/mscorlib/shared/System/Text/EncodingNLS.cs322
-rw-r--r--src/mscorlib/shared/System/Text/EncodingProvider.cs136
-rw-r--r--src/mscorlib/shared/System/Text/Normalization.cs29
-rw-r--r--src/mscorlib/shared/System/Text/StringBuilder.cs2409
-rw-r--r--src/mscorlib/shared/System/Text/UTF32Encoding.cs1234
-rw-r--r--src/mscorlib/shared/System/Text/UTF8Encoding.cs2668
-rw-r--r--src/mscorlib/shared/System/Text/UnicodeEncoding.cs2058
11 files changed, 10573 insertions, 0 deletions
diff --git a/src/mscorlib/shared/System/Text/ASCIIEncoding.cs b/src/mscorlib/shared/System/Text/ASCIIEncoding.cs
new file mode 100644
index 0000000000..e5c1194849
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/ASCIIEncoding.cs
@@ -0,0 +1,973 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+using System.Runtime.Serialization;
+
+namespace System.Text
+{
+ // ASCIIEncoding
+ //
+ // Note that ASCIIEncoding is optimized with no best fit and ? for fallback.
+ // It doesn't come in other flavors.
+ //
+ // Note: ASCIIEncoding is the only encoding that doesn't do best fit (windows has best fit).
+ //
+ // Note: IsAlwaysNormalized remains false because 1/2 the code points are unassigned, so they'd
+ // use fallbacks, and we cannot guarantee that fallbacks are normalized.
+
+ [Serializable]
+ public class ASCIIEncoding : Encoding
+ {
+ // Allow for devirtualization (see https://github.com/dotnet/coreclr/pull/9230)
+ [Serializable]
+ internal sealed class ASCIIEncodingSealed : ASCIIEncoding { }
+
+ // Used by Encoding.ASCII for lazy initialization
+ // The initialization code will not be run until a static member of the class is referenced
+ internal static readonly ASCIIEncodingSealed s_default = new ASCIIEncodingSealed();
+
+ public ASCIIEncoding() : base(Encoding.CodePageASCII)
+ {
+ }
+
+ internal override void SetDefaultFallbacks()
+ {
+ // For ASCIIEncoding we just use default replacement fallback
+ this.encoderFallback = EncoderFallback.ReplacementFallback;
+ this.decoderFallback = DecoderFallback.ReplacementFallback;
+ }
+
+ // WARNING: GetByteCount(string chars), GetBytes(string chars,...), and GetString(byte[] byteIndex...)
+ // WARNING: have different variable names than EncodingNLS.cs, so this can't just be cut & pasted,
+ // WARNING: or it'll break VB's way of calling these.
+ //
+ // The following methods are copied from EncodingNLS.cs.
+ // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
+ // These should be kept in sync for the following classes:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ // Returns the number of bytes required to encode a range of characters in
+ // a character array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(char[] chars, int index, int count)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - index < count)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input, return 0, avoid fixed empty array problem
+ if (count == 0)
+ return 0;
+
+ // Just call the pointer version
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(String chars)
+ {
+ // Validate input
+ if (chars==null)
+ throw new ArgumentNullException("chars");
+ Contract.EndContractBlock();
+
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars, chars.Length, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetByteCount(char* chars, int count)
+ {
+ // Validate Parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Call it with empty encoder
+ return GetByteCount(chars, count, null);
+ }
+
+ // Parent method is safe.
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ public override unsafe int GetBytes(String chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCount);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like empty byte arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. An exception occurs if the byte array is not large
+ // enough to hold the complete encoding of the characters. The
+ // GetByteCount method can be used to determine the exact number of
+ // bytes that will be produced for a given range of characters.
+ // Alternatively, the GetMaxByteCount method can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ // Validate parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If nothing to encode return 0, avoid fixed problem
+ if (charCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like empty byte arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
+ // Remember that byteCount is # to decode, not size of array.
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetBytes(chars, charCount, bytes, byteCount, null);
+ }
+
+ // Returns the number of characters produced by decoding a range of bytes
+ // in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetCharCount(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input just return 0, fixed doesn't like 0 length arrays
+ if (count == 0)
+ return 0;
+
+ // Just call pointer version
+ fixed (byte* pBytes = bytes)
+ return GetCharCount(pBytes + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetCharCount(byte* bytes, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetCharCount(bytes, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if ( bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (charIndex < 0 || charIndex > chars.Length)
+ throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If no input, return 0 & avoid fixed problem
+ if (byteCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int charCount = chars.Length - charIndex;
+
+ // Fixed doesn't like empty char arrays
+ if (chars.Length == 0)
+ chars = new char[1];
+
+ fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
+ // Remember that charCount is # to decode, not size of array
+ return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetChars(bytes, byteCount, chars, charCount, null);
+ }
+
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe String GetString(byte[] bytes, int byteIndex, int byteCount)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+
+ if (bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // Avoid problems with empty input buffer
+ if (byteCount == 0) return String.Empty;
+
+ fixed (byte* pBytes = bytes)
+ return String.CreateStringFromEncoding(
+ pBytes + byteIndex, byteCount, this);
+ }
+
+ //
+ // End of standard methods copied from EncodingNLS.cs
+ //
+
+ // GetByteCount
+ // Note: We start by assuming that the output will be the same as count. Having
+ // an encoder or fallback may change that assumption
+ internal override unsafe int GetByteCount(char* chars, int charCount, EncoderNLS encoder)
+ {
+ // Just need to ASSERT, this is called by something else internal that checked parameters already
+ Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetByteCount]count is negative");
+ Debug.Assert(chars != null, "[ASCIIEncoding.GetByteCount]chars is null");
+
+ // Assert because we shouldn't be able to have a null encoder.
+ Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetByteCount]Attempting to use null fallback encoder");
+
+ char charLeftOver = (char)0;
+ EncoderReplacementFallback fallback = null;
+
+ // Start by assuming default count, then +/- for fallback characters
+ char* charEnd = chars + charCount;
+
+ // For fallback we may need a fallback buffer, we know we aren't default fallback.
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ if (encoder != null)
+ {
+ charLeftOver = encoder.charLeftOver;
+ Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver),
+ "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate");
+
+ fallback = encoder.Fallback as EncoderReplacementFallback;
+
+ // We mustn't have left over fallback data when counting
+ if (encoder.InternalHasFallbackBuffer)
+ {
+ // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
+ fallbackBuffer = encoder.FallbackBuffer;
+ if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
+ }
+
+ // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
+ Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
+ encoder.FallbackBuffer.Remaining == 0,
+ "[ASCIICodePageEncoding.GetByteCount]Expected empty fallback buffer");
+ }
+ else
+ {
+ fallback = this.EncoderFallback as EncoderReplacementFallback;
+ }
+
+ // If we have an encoder AND we aren't using default fallback,
+ // then we may have a complicated count.
+ if (fallback != null && fallback.MaxCharCount == 1)
+ {
+ // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always
+ // same as input size.
+ // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy.
+
+ // We could however have 1 extra byte if the last call had an encoder and a funky fallback and
+ // if we don't use the funky fallback this time.
+
+ // Do we have an extra char left over from last time?
+ if (charLeftOver > 0)
+ charCount++;
+
+ return (charCount);
+ }
+
+ // Count is more complicated if you have a funky fallback
+ // For fallback we may need a fallback buffer, we know we're not default fallback
+ int byteCount = 0;
+
+ // We may have a left over character from last time, try and process it.
+ if (charLeftOver > 0)
+ {
+ Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate");
+ Debug.Assert(encoder != null, "[ASCIIEncoding.GetByteCount]Expected encoder");
+
+ // Since left over char was a surrogate, it'll have to be fallen back.
+ // Get Fallback
+ fallbackBuffer = encoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
+
+ // This will fallback a pair if *chars is a low surrogate
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+ }
+
+ // Now we may have fallback char[] already from the encoder
+
+ // Go ahead and do it, including the fallback.
+ char ch;
+ while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
+ chars < charEnd)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Check for fallback, this'll catch surrogate pairs too.
+ // no chars >= 0x80 are allowed.
+ if (ch > 0x7f)
+ {
+ if (fallbackBuffer == null)
+ {
+ // Initialize the buffer
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false);
+ }
+
+ // Get Fallback
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+ continue;
+ }
+
+ // We'll use this one
+ byteCount++;
+ }
+
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[ASCIIEncoding.GetByteCount]Expected Empty fallback buffer");
+
+ return byteCount;
+ }
+
+ internal override unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, EncoderNLS encoder)
+ {
+ // Just need to ASSERT, this is called by something else internal that checked parameters already
+ Debug.Assert(bytes != null, "[ASCIIEncoding.GetBytes]bytes is null");
+ Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetBytes]byteCount is negative");
+ Debug.Assert(chars != null, "[ASCIIEncoding.GetBytes]chars is null");
+ Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetBytes]charCount is negative");
+
+ // Assert because we shouldn't be able to have a null encoder.
+ Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetBytes]Attempting to use null encoder fallback");
+
+ // Get any left over characters
+ char charLeftOver = (char)0;
+ EncoderReplacementFallback fallback = null;
+
+ // For fallback we may need a fallback buffer, we know we aren't default fallback.
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ // prepare our end
+ char* charEnd = chars + charCount;
+ byte* byteStart = bytes;
+ char* charStart = chars;
+
+ if (encoder != null)
+ {
+ charLeftOver = encoder.charLeftOver;
+ fallback = encoder.Fallback as EncoderReplacementFallback;
+
+ // We mustn't have left over fallback data when counting
+ if (encoder.InternalHasFallbackBuffer)
+ {
+ // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
+ fallbackBuffer = encoder.FallbackBuffer;
+ if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
+ }
+
+ Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver),
+ "[ASCIIEncoding.GetBytes]leftover character should be high surrogate");
+
+ // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
+ Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
+ encoder.FallbackBuffer.Remaining == 0,
+ "[ASCIICodePageEncoding.GetBytes]Expected empty fallback buffer");
+ }
+ else
+ {
+ fallback = this.EncoderFallback as EncoderReplacementFallback;
+ }
+
+
+ // See if we do the fast default or slightly slower fallback
+ if (fallback != null && fallback.MaxCharCount == 1)
+ {
+ // Fast version
+ char cReplacement = fallback.DefaultString[0];
+
+ // Check for replacements in range, otherwise fall back to slow version.
+ if (cReplacement <= (char)0x7f)
+ {
+ // We should have exactly as many output bytes as input bytes, unless there's a left
+ // over character, in which case we may need one more.
+ // If we had a left over character will have to add a ? (This happens if they had a funky
+ // fallback last time, but not this time.) (We can't spit any out though
+ // because with fallback encoder each surrogate is treated as a seperate code point)
+ if (charLeftOver > 0)
+ {
+ // Have to have room
+ // Throw even if doing no throw version because this is just 1 char,
+ // so buffer will never be big enough
+ if (byteCount == 0)
+ ThrowBytesOverflow(encoder, true);
+
+ // This'll make sure we still have more room and also make sure our return value is correct.
+ *(bytes++) = (byte)cReplacement;
+ byteCount--; // We used one of the ones we were counting.
+ }
+
+ // This keeps us from overrunning our output buffer
+ if (byteCount < charCount)
+ {
+ // Throw or make buffer smaller?
+ ThrowBytesOverflow(encoder, byteCount < 1);
+
+ // Just use what we can
+ charEnd = chars + byteCount;
+ }
+
+ // We just do a quick copy
+ while (chars < charEnd)
+ {
+ char ch2 = *(chars++);
+ if (ch2 >= 0x0080) *(bytes++) = (byte)cReplacement;
+ else *(bytes++) = unchecked((byte)(ch2));
+ }
+
+ // Clear encoder
+ if (encoder != null)
+ {
+ encoder.charLeftOver = (char)0;
+ encoder.m_charsUsed = (int)(chars - charStart);
+ }
+
+ return (int)(bytes - byteStart);
+ }
+ }
+
+ // Slower version, have to do real fallback.
+
+ // prepare our end
+ byte* byteEnd = bytes + byteCount;
+
+ // We may have a left over character from last time, try and process it.
+ if (charLeftOver > 0)
+ {
+ // Initialize the buffer
+ Debug.Assert(encoder != null,
+ "[ASCIIEncoding.GetBytes]Expected non null encoder if we have surrogate left over");
+ fallbackBuffer = encoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true);
+
+ // Since left over char was a surrogate, it'll have to be fallen back.
+ // Get Fallback
+ // This will fallback a pair if *chars is a low surrogate
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+ }
+
+ // Now we may have fallback char[] already from the encoder
+
+ // Go ahead and do it, including the fallback.
+ char ch;
+ while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
+ chars < charEnd)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Check for fallback, this'll catch surrogate pairs too.
+ // All characters >= 0x80 must fall back.
+ if (ch > 0x7f)
+ {
+ // Initialize the buffer
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true);
+ }
+
+ // Get Fallback
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Go ahead & continue (& do the fallback)
+ continue;
+ }
+
+ // We'll use this one
+ // Bounds check
+ if (bytes >= byteEnd)
+ {
+ // didn't use this char, we'll throw or use buffer
+ if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
+ {
+ Debug.Assert(chars > charStart || bytes == byteStart,
+ "[ASCIIEncoding.GetBytes]Expected chars to have advanced already.");
+ chars--; // don't use last char
+ }
+ else
+ fallbackBuffer.MovePrevious();
+
+ // Are we throwing or using buffer?
+ ThrowBytesOverflow(encoder, bytes == byteStart); // throw?
+ break; // don't throw, stop
+ }
+
+ // Go ahead and add it
+ *bytes = unchecked((byte)ch);
+ bytes++;
+ }
+
+ // Need to do encoder stuff
+ if (encoder != null)
+ {
+ // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases
+ if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder)
+ // Clear it in case of MustFlush
+ encoder.charLeftOver = (char)0;
+
+ // Set our chars used count
+ encoder.m_charsUsed = (int)(chars - charStart);
+ }
+
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
+ (encoder != null && !encoder.m_throwOnOverflow),
+ "[ASCIIEncoding.GetBytes]Expected Empty fallback buffer at end");
+
+ return (int)(bytes - byteStart);
+ }
+
+ // This is internal and called by something else,
+ internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
+ {
+ // Just assert, we're called internally so these should be safe, checked already
+ Debug.Assert(bytes != null, "[ASCIIEncoding.GetCharCount]bytes is null");
+ Debug.Assert(count >= 0, "[ASCIIEncoding.GetCharCount]byteCount is negative");
+
+ // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
+ DecoderReplacementFallback fallback = null;
+
+ if (decoder == null)
+ fallback = this.DecoderFallback as DecoderReplacementFallback;
+ else
+ {
+ fallback = decoder.Fallback as DecoderReplacementFallback;
+ Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
+ decoder.FallbackBuffer.Remaining == 0,
+ "[ASCIICodePageEncoding.GetCharCount]Expected empty fallback buffer");
+ }
+
+ if (fallback != null && fallback.MaxCharCount == 1)
+ {
+ // Just return length, SBCS stay the same length because they don't map to surrogate
+ // pairs and we don't have a decoder fallback.
+
+ return count;
+ }
+
+ // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII
+ DecoderFallbackBuffer fallbackBuffer = null;
+
+ // Have to do it the hard way.
+ // Assume charCount will be == count
+ int charCount = count;
+ byte[] byteBuffer = new byte[1];
+
+ // Do it our fast way
+ byte* byteEnd = bytes + count;
+
+ // Quick loop
+ while (bytes < byteEnd)
+ {
+ // Faster if don't use *bytes++;
+ byte b = *bytes;
+ bytes++;
+
+ // If unknown we have to do fallback count
+ if (b >= 0x80)
+ {
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(byteEnd - count, null);
+ }
+
+ // Use fallback buffer
+ byteBuffer[0] = b;
+ charCount--; // Have to unreserve the one we already allocated for b
+ charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
+ }
+ }
+
+ // Fallback buffer must be empty
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[ASCIIEncoding.GetCharCount]Expected Empty fallback buffer");
+
+ // Converted sequence is same length as input
+ return charCount;
+ }
+
+ internal override unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, DecoderNLS decoder)
+ {
+ // Just need to ASSERT, this is called by something else internal that checked parameters already
+ Debug.Assert(bytes != null, "[ASCIIEncoding.GetChars]bytes is null");
+ Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetChars]byteCount is negative");
+ Debug.Assert(chars != null, "[ASCIIEncoding.GetChars]chars is null");
+ Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetChars]charCount is negative");
+
+ // Do it fast way if using ? replacement fallback
+ byte* byteEnd = bytes + byteCount;
+ byte* byteStart = bytes;
+ char* charStart = chars;
+
+ // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f
+ // Only need decoder fallback buffer if not using ? fallback.
+ // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
+ DecoderReplacementFallback fallback = null;
+ char* charsForFallback;
+
+ if (decoder == null)
+ fallback = this.DecoderFallback as DecoderReplacementFallback;
+ else
+ {
+ fallback = decoder.Fallback as DecoderReplacementFallback;
+ Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
+ decoder.FallbackBuffer.Remaining == 0,
+ "[ASCIICodePageEncoding.GetChars]Expected empty fallback buffer");
+ }
+
+ if (fallback != null && fallback.MaxCharCount == 1)
+ {
+ // Try it the fast way
+ char replacementChar = fallback.DefaultString[0];
+
+ // Need byteCount chars, otherwise too small buffer
+ if (charCount < byteCount)
+ {
+ // Need at least 1 output byte, throw if must throw
+ ThrowCharsOverflow(decoder, charCount < 1);
+
+ // Not throwing, use what we can
+ byteEnd = bytes + charCount;
+ }
+
+ // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
+ while (bytes < byteEnd)
+ {
+ byte b = *(bytes++);
+ if (b >= 0x80)
+ // This is an invalid byte in the ASCII encoding.
+ *(chars++) = replacementChar;
+ else
+ *(chars++) = unchecked((char)b);
+ }
+
+ // bytes & chars used are the same
+ if (decoder != null)
+ decoder.m_bytesUsed = (int)(bytes - byteStart);
+ return (int)(chars - charStart);
+ }
+
+ // Slower way's going to need a fallback buffer
+ DecoderFallbackBuffer fallbackBuffer = null;
+ byte[] byteBuffer = new byte[1];
+ char* charEnd = chars + charCount;
+
+ // Not quite so fast loop
+ while (bytes < byteEnd)
+ {
+ // Faster if don't use *bytes++;
+ byte b = *(bytes);
+ bytes++;
+
+ if (b >= 0x80)
+ {
+ // This is an invalid byte in the ASCII encoding.
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+ fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
+ }
+
+ // Use fallback buffer
+ byteBuffer[0] = b;
+
+ // Note that chars won't get updated unless this succeeds
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+ // May or may not throw, but we didn't get this byte
+ Debug.Assert(bytes > byteStart || chars == charStart,
+ "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (fallback case)");
+ bytes--; // unused byte
+ fallbackBuffer.InternalReset(); // Didn't fall this back
+ ThrowCharsOverflow(decoder, chars == charStart); // throw?
+ break; // don't throw, but stop loop
+ }
+ }
+ else
+ {
+ // Make sure we have buffer space
+ if (chars >= charEnd)
+ {
+ Debug.Assert(bytes > byteStart || chars == charStart,
+ "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (normal case)");
+ bytes--; // unused byte
+ ThrowCharsOverflow(decoder, chars == charStart); // throw?
+ break; // don't throw, but stop loop
+ }
+
+ *(chars) = unchecked((char)b);
+ chars++;
+ }
+ }
+
+ // Might have had decoder fallback stuff.
+ if (decoder != null)
+ decoder.m_bytesUsed = (int)(bytes - byteStart);
+
+ // Expect Empty fallback buffer for GetChars
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[ASCIIEncoding.GetChars]Expected Empty fallback buffer");
+
+ return (int)(chars - charStart);
+ }
+
+
+ public override int GetMaxByteCount(int charCount)
+ {
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(charCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
+ long byteCount = (long)charCount + 1;
+
+ if (EncoderFallback.MaxCharCount > 1)
+ byteCount *= EncoderFallback.MaxCharCount;
+
+ // 1 to 1 for most characters. Only surrogates with fallbacks have less.
+
+ if (byteCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
+ return (int)byteCount;
+ }
+
+
+ public override int GetMaxCharCount(int byteCount)
+ {
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(byteCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Just return length, SBCS stay the same length because they don't map to surrogate
+ long charCount = (long)byteCount;
+
+ // 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer.
+ if (DecoderFallback.MaxCharCount > 1)
+ charCount *= DecoderFallback.MaxCharCount;
+
+ if (charCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
+
+ return (int)charCount;
+ }
+
+ // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
+
+ public override bool IsSingleByte
+ {
+ get
+ {
+ return true;
+ }
+ }
+
+ public override Decoder GetDecoder()
+ {
+ return new DecoderNLS(this);
+ }
+
+
+ public override Encoder GetEncoder()
+ {
+ return new EncoderNLS(this);
+ }
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/Decoder.cs b/src/mscorlib/shared/System/Text/Decoder.cs
new file mode 100644
index 0000000000..b2a003037b
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/Decoder.cs
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.Serialization;
+using System.Text;
+using System;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+
+namespace System.Text
+{
+ // A Decoder is used to decode a sequence of blocks of bytes into a
+ // sequence of blocks of characters. Following instantiation of a decoder,
+ // sequential blocks of bytes are converted into blocks of characters through
+ // calls to the GetChars method. The decoder maintains state between the
+ // conversions, allowing it to correctly decode byte sequences that span
+ // adjacent blocks.
+ //
+ // Instances of specific implementations of the Decoder abstract base
+ // class are typically obtained through calls to the GetDecoder method
+ // of Encoding objects.
+ //
+ [Serializable]
+ public abstract class Decoder
+ {
+ internal DecoderFallback m_fallback = null;
+
+ [NonSerialized]
+ internal DecoderFallbackBuffer m_fallbackBuffer = null;
+
+ internal void SerializeDecoder(SerializationInfo info)
+ {
+ info.AddValue("m_fallback", this.m_fallback);
+ }
+
+ protected Decoder()
+ {
+ // We don't call default reset because default reset probably isn't good if we aren't initialized.
+ }
+
+ public DecoderFallback Fallback
+ {
+ get
+ {
+ return m_fallback;
+ }
+
+ set
+ {
+ if (value == null)
+ throw new ArgumentNullException(nameof(value));
+ Contract.EndContractBlock();
+
+ // Can't change fallback if buffer is wrong
+ if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(
+ SR.Argument_FallbackBufferNotEmpty, nameof(value));
+
+ m_fallback = value;
+ m_fallbackBuffer = null;
+ }
+ }
+
+ // Note: we don't test for threading here because async access to Encoders and Decoders
+ // doesn't work anyway.
+ public DecoderFallbackBuffer FallbackBuffer
+ {
+ get
+ {
+ if (m_fallbackBuffer == null)
+ {
+ if (m_fallback != null)
+ m_fallbackBuffer = m_fallback.CreateFallbackBuffer();
+ else
+ m_fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer();
+ }
+
+ return m_fallbackBuffer;
+ }
+ }
+
+ internal bool InternalHasFallbackBuffer
+ {
+ get
+ {
+ return m_fallbackBuffer != null;
+ }
+ }
+
+ // Reset the Decoder
+ //
+ // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder. This
+ // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
+ //
+ // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
+ //
+ // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
+ // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
+ public virtual void Reset()
+ {
+ byte[] byteTemp = Array.Empty<byte>();
+ char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)];
+ GetChars(byteTemp, 0, 0, charTemp, 0, true);
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Returns the number of characters the next call to GetChars will
+ // produce if presented with the given range of bytes. The returned value
+ // takes into account the state in which the decoder was left following the
+ // last call to GetChars. The state of the decoder is not affected
+ // by a call to this method.
+ //
+ public abstract int GetCharCount(byte[] bytes, int index, int count);
+
+ public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
+ {
+ return GetCharCount(bytes, index, count);
+ }
+
+ // We expect this to be the workhorse for NLS Encodings, but for existing
+ // ones we need a working (if slow) default implementation)
+ [CLSCompliant(false)]
+ public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
+ {
+ // Validate input parameters
+ if (bytes == null)
+ throw new ArgumentNullException(nameof(bytes),
+ SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException(nameof(count),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ byte[] arrbyte = new byte[count];
+ int index;
+
+ for (index = 0; index < count; index++)
+ arrbyte[index] = bytes[index];
+
+ return GetCharCount(arrbyte, 0, count);
+ }
+
+ // Decodes a range of bytes in a byte array into a range of characters
+ // in a character array. The method decodes byteCount bytes from
+ // bytes starting at index byteIndex, storing the resulting
+ // characters in chars starting at index charIndex. The
+ // decoding takes into account the state in which the decoder was left
+ // following the last call to this method.
+ //
+ // An exception occurs if the character array is not large enough to
+ // hold the complete decoding of the bytes. The GetCharCount method
+ // can be used to determine the exact number of characters that will be
+ // produced for a given range of bytes. Alternatively, the
+ // GetMaxCharCount method of the Encoding that produced this
+ // decoder can be used to determine the maximum number of characters that
+ // will be produced for a given number of bytes, regardless of the actual
+ // byte values.
+ //
+ public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex);
+
+ public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex, bool flush)
+ {
+ return GetChars(bytes, byteIndex, byteCount, chars, charIndex);
+ }
+
+ // We expect this to be the workhorse for NLS Encodings, but for existing
+ // ones we need a working (if slow) default implementation)
+ //
+ // WARNING WARNING WARNING
+ //
+ // WARNING: If this breaks it could be a security threat. Obviously we
+ // call this internally, so you need to make sure that your pointers, counts
+ // and indexes are correct when you call this method.
+ //
+ // In addition, we have internal code, which will be marked as "safe" calling
+ // this code. However this code is dependent upon the implementation of an
+ // external GetChars() method, which could be overridden by a third party and
+ // the results of which cannot be guaranteed. We use that result to copy
+ // the char[] to our char* output buffer. If the result count was wrong, we
+ // could easily overflow our output buffer. Therefore we do an extra test
+ // when we copy the buffer so that we don't overflow charCount either.
+ [CLSCompliant(false)]
+ public virtual unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, bool flush)
+ {
+ // Validate input parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
+ SR.ArgumentNull_Array);
+
+ if (byteCount < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Get the byte array to convert
+ byte[] arrByte = new byte[byteCount];
+
+ int index;
+ for (index = 0; index < byteCount; index++)
+ arrByte[index] = bytes[index];
+
+ // Get the char array to fill
+ char[] arrChar = new char[charCount];
+
+ // Do the work
+ int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush);
+
+ Debug.Assert(result <= charCount, "Returned more chars than we have space for");
+
+ // Copy the char array
+ // WARNING: We MUST make sure that we don't copy too many chars. We can't
+ // rely on result because it could be a 3rd party implementation. We need
+ // to make sure we never copy more than charCount chars no matter the value
+ // of result
+ if (result < charCount)
+ charCount = result;
+
+ // We check both result and charCount so that we don't accidentally overrun
+ // our pointer buffer just because of an issue in GetChars
+ for (index = 0; index < charCount; index++)
+ chars[index] = arrChar[index];
+
+ return charCount;
+ }
+
+ // This method is used when the output buffer might not be large enough.
+ // It will decode until it runs out of bytes, and then it will return
+ // true if it the entire input was converted. In either case it
+ // will also return the number of converted bytes and output characters used.
+ // It will only throw a buffer overflow exception if the entire lenght of chars[] is
+ // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
+ // We're done processing this buffer only if completed returns true.
+ //
+ // Might consider checking Max...Count to avoid the extra counting step.
+ //
+ // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
+ // that its likely that we didn't consume as many bytes as we could have. For some
+ // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
+ public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex, int charCount, bool flush,
+ out int bytesUsed, out int charsUsed, out bool completed)
+ {
+ // Validate parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException((bytes == null ? nameof(bytes) : nameof(chars)),
+ SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException(nameof(bytes),
+ SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException(nameof(chars),
+ SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ bytesUsed = byteCount;
+
+ // Its easy to do if it won't overrun our buffer.
+ while (bytesUsed > 0)
+ {
+ if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount)
+ {
+ charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush);
+ completed = (bytesUsed == byteCount &&
+ (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
+ return;
+ }
+
+ // Try again with 1/2 the count, won't flush then 'cause won't read it all
+ flush = false;
+ bytesUsed /= 2;
+ }
+
+ // Oops, we didn't have anything, we'll have to throw an overflow
+ throw new ArgumentException(SR.Argument_ConversionOverflow);
+ }
+
+ // This is the version that uses *.
+ // We're done processing this buffer only if completed returns true.
+ //
+ // Might consider checking Max...Count to avoid the extra counting step.
+ //
+ // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
+ // that its likely that we didn't consume as many bytes as we could have. For some
+ // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
+ [CLSCompliant(false)]
+ public virtual unsafe void Convert(byte* bytes, int byteCount,
+ char* chars, int charCount, bool flush,
+ out int bytesUsed, out int charsUsed, out bool completed)
+ {
+ // Validate input parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
+ SR.ArgumentNull_Array);
+
+ if (byteCount < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Get ready to do it
+ bytesUsed = byteCount;
+
+ // Its easy to do if it won't overrun our buffer.
+ while (bytesUsed > 0)
+ {
+ if (GetCharCount(bytes, bytesUsed, flush) <= charCount)
+ {
+ charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush);
+ completed = (bytesUsed == byteCount &&
+ (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
+ return;
+ }
+
+ // Try again with 1/2 the count, won't flush then 'cause won't read it all
+ flush = false;
+ bytesUsed /= 2;
+ }
+
+ // Oops, we didn't have anything, we'll have to throw an overflow
+ throw new ArgumentException(SR.Argument_ConversionOverflow);
+ }
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/Encoder.cs b/src/mscorlib/shared/System/Text/Encoder.cs
new file mode 100644
index 0000000000..e4e91765e1
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/Encoder.cs
@@ -0,0 +1,333 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.Serialization;
+using System.Text;
+using System;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+
+namespace System.Text
+{
+ // An Encoder is used to encode a sequence of blocks of characters into
+ // a sequence of blocks of bytes. Following instantiation of an encoder,
+ // sequential blocks of characters are converted into blocks of bytes through
+ // calls to the GetBytes method. The encoder maintains state between the
+ // conversions, allowing it to correctly encode character sequences that span
+ // adjacent blocks.
+ //
+ // Instances of specific implementations of the Encoder abstract base
+ // class are typically obtained through calls to the GetEncoder method
+ // of Encoding objects.
+ //
+ [Serializable]
+ public abstract class Encoder
+ {
+ internal EncoderFallback m_fallback = null;
+
+ [NonSerialized]
+ internal EncoderFallbackBuffer m_fallbackBuffer = null;
+
+ internal void SerializeEncoder(SerializationInfo info)
+ {
+ info.AddValue("m_fallback", this.m_fallback);
+ }
+
+ protected Encoder()
+ {
+ // We don't call default reset because default reset probably isn't good if we aren't initialized.
+ }
+
+ public EncoderFallback Fallback
+ {
+ get
+ {
+ return m_fallback;
+ }
+
+ set
+ {
+ if (value == null)
+ throw new ArgumentNullException(nameof(value));
+ Contract.EndContractBlock();
+
+ // Can't change fallback if buffer is wrong
+ if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(
+ SR.Argument_FallbackBufferNotEmpty, nameof(value));
+
+ m_fallback = value;
+ m_fallbackBuffer = null;
+ }
+ }
+
+ // Note: we don't test for threading here because async access to Encoders and Decoders
+ // doesn't work anyway.
+ public EncoderFallbackBuffer FallbackBuffer
+ {
+ get
+ {
+ if (m_fallbackBuffer == null)
+ {
+ if (m_fallback != null)
+ m_fallbackBuffer = m_fallback.CreateFallbackBuffer();
+ else
+ m_fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer();
+ }
+
+ return m_fallbackBuffer;
+ }
+ }
+
+ internal bool InternalHasFallbackBuffer
+ {
+ get
+ {
+ return m_fallbackBuffer != null;
+ }
+ }
+
+ // Reset the Encoder
+ //
+ // Normally if we call GetBytes() and an error is thrown we don't change the state of the encoder. This
+ // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
+ //
+ // If the caller doesn't want to try again after GetBytes() throws an error, then they need to call Reset().
+ //
+ // Virtual implementation has to call GetBytes with flush and a big enough buffer to clear a 0 char string
+ // We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big.
+ public virtual void Reset()
+ {
+ char[] charTemp = { };
+ byte[] byteTemp = new byte[GetByteCount(charTemp, 0, 0, true)];
+ GetBytes(charTemp, 0, 0, byteTemp, 0, true);
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Returns the number of bytes the next call to GetBytes will
+ // produce if presented with the given range of characters and the given
+ // value of the flush parameter. The returned value takes into
+ // account the state in which the encoder was left following the last call
+ // to GetBytes. The state of the encoder is not affected by a call
+ // to this method.
+ //
+ public abstract int GetByteCount(char[] chars, int index, int count, bool flush);
+
+ // We expect this to be the workhorse for NLS encodings
+ // unfortunately for existing overrides, it has to call the [] version,
+ // which is really slow, so avoid this method if you might be calling external encodings.
+ [CLSCompliant(false)]
+ public virtual unsafe int GetByteCount(char* chars, int count, bool flush)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException(nameof(chars),
+ SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException(nameof(count),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ char[] arrChar = new char[count];
+ int index;
+
+ for (index = 0; index < count; index++)
+ arrChar[index] = chars[index];
+
+ return GetByteCount(arrChar, 0, count, flush);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. The method encodes charCount characters from
+ // chars starting at index charIndex, storing the resulting
+ // bytes in bytes starting at index byteIndex. The encoding
+ // takes into account the state in which the encoder was left following the
+ // last call to this method. The flush parameter indicates whether
+ // the encoder should flush any shift-states and partial characters at the
+ // end of the conversion. To ensure correct termination of a sequence of
+ // blocks of encoded bytes, the last call to GetBytes should specify
+ // a value of true for the flush parameter.
+ //
+ // An exception occurs if the byte array is not large enough to hold the
+ // complete encoding of the characters. The GetByteCount method can
+ // be used to determine the exact number of bytes that will be produced for
+ // a given range of characters. Alternatively, the GetMaxByteCount
+ // method of the Encoding that produced this encoder can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ public abstract int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex, bool flush);
+
+ // We expect this to be the workhorse for NLS Encodings, but for existing
+ // ones we need a working (if slow) default implementation)
+ //
+ // WARNING WARNING WARNING
+ //
+ // WARNING: If this breaks it could be a security threat. Obviously we
+ // call this internally, so you need to make sure that your pointers, counts
+ // and indexes are correct when you call this method.
+ //
+ // In addition, we have internal code, which will be marked as "safe" calling
+ // this code. However this code is dependent upon the implementation of an
+ // external GetBytes() method, which could be overridden by a third party and
+ // the results of which cannot be guaranteed. We use that result to copy
+ // the byte[] to our byte* output buffer. If the result count was wrong, we
+ // could easily overflow our output buffer. Therefore we do an extra test
+ // when we copy the buffer so that we don't overflow byteCount either.
+ [CLSCompliant(false)]
+ public virtual unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, bool flush)
+ {
+ // Validate input parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
+ SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Get the char array to convert
+ char[] arrChar = new char[charCount];
+
+ int index;
+ for (index = 0; index < charCount; index++)
+ arrChar[index] = chars[index];
+
+ // Get the byte array to fill
+ byte[] arrByte = new byte[byteCount];
+
+ // Do the work
+ int result = GetBytes(arrChar, 0, charCount, arrByte, 0, flush);
+
+ Debug.Assert(result <= byteCount, "Returned more bytes than we have space for");
+
+ // Copy the byte array
+ // WARNING: We MUST make sure that we don't copy too many bytes. We can't
+ // rely on result because it could be a 3rd party implementation. We need
+ // to make sure we never copy more than byteCount bytes no matter the value
+ // of result
+ if (result < byteCount)
+ byteCount = result;
+
+ // Don't copy too many bytes!
+ for (index = 0; index < byteCount; index++)
+ bytes[index] = arrByte[index];
+
+ return byteCount;
+ }
+
+ // This method is used to avoid running out of output buffer space.
+ // It will encode until it runs out of chars, and then it will return
+ // true if it the entire input was converted. In either case it
+ // will also return the number of converted chars and output bytes used.
+ // It will only throw a buffer overflow exception if the entire lenght of bytes[] is
+ // too small to store the next byte. (like 0 or maybe 1 or 4 for some encodings)
+ // We're done processing this buffer only if completed returns true.
+ //
+ // Might consider checking Max...Count to avoid the extra counting step.
+ //
+ // Note that if all of the input chars are not consumed, then we'll do a /2, which means
+ // that its likely that we didn't consume as many chars as we could have. For some
+ // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
+ public virtual void Convert(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex, int byteCount, bool flush,
+ out int charsUsed, out int bytesUsed, out bool completed)
+ {
+ // Validate parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)),
+ SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException(nameof(chars),
+ SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException(nameof(bytes),
+ SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ charsUsed = charCount;
+
+ // Its easy to do if it won't overrun our buffer.
+ // Note: We don't want to call unsafe version because that might be an untrusted version
+ // which could be really unsafe and we don't want to mix it up.
+ while (charsUsed > 0)
+ {
+ if (GetByteCount(chars, charIndex, charsUsed, flush) <= byteCount)
+ {
+ bytesUsed = GetBytes(chars, charIndex, charsUsed, bytes, byteIndex, flush);
+ completed = (charsUsed == charCount &&
+ (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
+ return;
+ }
+
+ // Try again with 1/2 the count, won't flush then 'cause won't read it all
+ flush = false;
+ charsUsed /= 2;
+ }
+
+ // Oops, we didn't have anything, we'll have to throw an overflow
+ throw new ArgumentException(SR.Argument_ConversionOverflow);
+ }
+
+ // Same thing, but using pointers
+ //
+ // Might consider checking Max...Count to avoid the extra counting step.
+ //
+ // Note that if all of the input chars are not consumed, then we'll do a /2, which means
+ // that its likely that we didn't consume as many chars as we could have. For some
+ // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
+ [CLSCompliant(false)]
+ public virtual unsafe void Convert(char* chars, int charCount,
+ byte* bytes, int byteCount, bool flush,
+ out int charsUsed, out int bytesUsed, out bool completed)
+ {
+ // Validate input parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
+ SR.ArgumentNull_Array);
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Get ready to do it
+ charsUsed = charCount;
+
+ // Its easy to do if it won't overrun our buffer.
+ while (charsUsed > 0)
+ {
+ if (GetByteCount(chars, charsUsed, flush) <= byteCount)
+ {
+ bytesUsed = GetBytes(chars, charsUsed, bytes, byteCount, flush);
+ completed = (charsUsed == charCount &&
+ (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
+ return;
+ }
+
+ // Try again with 1/2 the count, won't flush then 'cause won't read it all
+ flush = false;
+ charsUsed /= 2;
+ }
+
+ // Oops, we didn't have anything, we'll have to throw an overflow
+ throw new ArgumentException(SR.Argument_ConversionOverflow);
+ }
+ }
+}
+
diff --git a/src/mscorlib/shared/System/Text/EncodingInfo.cs b/src/mscorlib/shared/System/Text/EncodingInfo.cs
new file mode 100644
index 0000000000..360dd7f638
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/EncodingInfo.cs
@@ -0,0 +1,72 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Text;
+
+namespace System.Text
+{
+ [Serializable]
+ public sealed class EncodingInfo
+ {
+ private int iCodePage; // Code Page #
+ private string strEncodingName; // Short name (web name)
+ private string strDisplayName; // Full localized name
+
+ internal EncodingInfo(int codePage, string name, string displayName)
+ {
+ iCodePage = codePage;
+ strEncodingName = name;
+ strDisplayName = displayName;
+ }
+
+
+ public int CodePage
+ {
+ get
+ {
+ return iCodePage;
+ }
+ }
+
+
+ public string Name
+ {
+ get
+ {
+ return strEncodingName;
+ }
+ }
+
+
+ public string DisplayName
+ {
+ get
+ {
+ return strDisplayName;
+ }
+ }
+
+
+ public Encoding GetEncoding()
+ {
+ return Encoding.GetEncoding(iCodePage);
+ }
+
+ public override bool Equals(Object value)
+ {
+ EncodingInfo that = value as EncodingInfo;
+ if (that != null)
+ {
+ return (this.CodePage == that.CodePage);
+ }
+ return (false);
+ }
+
+ public override int GetHashCode()
+ {
+ return this.CodePage;
+ }
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/EncodingNLS.cs b/src/mscorlib/shared/System/Text/EncodingNLS.cs
new file mode 100644
index 0000000000..205ae26902
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/EncodingNLS.cs
@@ -0,0 +1,322 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Diagnostics.Contracts;
+using System.Collections;
+using System.Globalization;
+using System.Threading;
+
+namespace System.Text
+{
+ // This class overrides Encoding with the things we need for our NLS Encodings
+ //
+ // All of the GetBytes/Chars GetByte/CharCount methods are just wrappers for the pointer
+ // plus decoder/encoder method that is our real workhorse. Note that this is an internal
+ // class, so our public classes cannot derive from this class. Because of this, all of the
+ // GetBytes/Chars GetByte/CharCount wrapper methods are duplicated in all of our public
+ // encodings, which currently include:
+ //
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, & UnicodeEncoding
+ //
+ // So if you change the wrappers in this class, you must change the wrappers in the other classes
+ // as well because they should have the same behavior.
+
+ [Serializable]
+ internal abstract class EncodingNLS : Encoding
+ {
+ protected EncodingNLS(int codePage) : base(codePage)
+ {
+ }
+
+ // Returns the number of bytes required to encode a range of characters in
+ // a character array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+ public override unsafe int GetByteCount(char[] chars, int index, int count)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - index < count)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input, return 0, avoid fixed empty array problem
+ if (count == 0)
+ return 0;
+
+ // Just call the pointer version
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+ public override unsafe int GetByteCount(String s)
+ {
+ // Validate input
+ if (s==null)
+ throw new ArgumentNullException("s");
+ Contract.EndContractBlock();
+
+ fixed (char* pChars = s)
+ return GetByteCount(pChars, s.Length, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ public override unsafe int GetByteCount(char* chars, int count)
+ {
+ // Validate Parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Call it with empty encoder
+ return GetByteCount(chars, count, null);
+ }
+
+ // Parent method is safe.
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ public override unsafe int GetBytes(String s, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ if (s == null || bytes == null)
+ throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (s.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like empty arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. An exception occurs if the byte array is not large
+ // enough to hold the complete encoding of the characters. The
+ // GetByteCount method can be used to determine the exact number of
+ // bytes that will be produced for a given range of characters.
+ // Alternatively, the GetMaxByteCount method can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+ public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ // Validate parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If nothing to encode return 0, avoid fixed problem
+ if (charCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like empty arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
+ // Remember that byteCount is # to decode, not size of array.
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetBytes(chars, charCount, bytes, byteCount, null);
+ }
+
+ // Returns the number of characters produced by decoding a range of bytes
+ // in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+ public override unsafe int GetCharCount(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input just return 0, fixed doesn't like 0 length arrays
+ if (count == 0)
+ return 0;
+
+ // Just call pointer version
+ fixed (byte* pBytes = bytes)
+ return GetCharCount(pBytes + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ public override unsafe int GetCharCount(byte* bytes, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetCharCount(bytes, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+ public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if ( bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (charIndex < 0 || charIndex > chars.Length)
+ throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If no input, return 0 & avoid fixed problem
+ if (byteCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int charCount = chars.Length - charIndex;
+
+ // Fixed doesn't like empty arrays
+ if (chars.Length == 0)
+ chars = new char[1];
+
+ fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
+ // Remember that charCount is # to decode, not size of array
+ return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetChars(bytes, byteCount, chars, charCount, null);
+ }
+
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+ public override unsafe String GetString(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // Avoid problems with empty input buffer
+ if (count == 0) return String.Empty;
+
+ fixed (byte* pBytes = bytes)
+ return String.CreateStringFromEncoding(
+ pBytes + index, count, this);
+ }
+
+ public override Decoder GetDecoder()
+ {
+ return new DecoderNLS(this);
+ }
+
+ public override Encoder GetEncoder()
+ {
+ return new EncoderNLS(this);
+ }
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/EncodingProvider.cs b/src/mscorlib/shared/System/Text/EncodingProvider.cs
new file mode 100644
index 0000000000..ce8c3e0208
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/EncodingProvider.cs
@@ -0,0 +1,136 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace System.Text
+{
+ public abstract class EncodingProvider
+ {
+ public EncodingProvider() { }
+ public abstract Encoding GetEncoding(string name);
+ public abstract Encoding GetEncoding(int codepage);
+
+ // GetEncoding should return either valid encoding or null. shouldn't throw any exception except on null name
+ public virtual Encoding GetEncoding(string name, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
+ {
+ Encoding enc = GetEncoding(name);
+ if (enc != null)
+ {
+ enc = (Encoding)GetEncoding(name).Clone();
+ enc.EncoderFallback = encoderFallback;
+ enc.DecoderFallback = decoderFallback;
+ }
+
+ return enc;
+ }
+
+ public virtual Encoding GetEncoding(int codepage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
+ {
+ Encoding enc = GetEncoding(codepage);
+ if (enc != null)
+ {
+ enc = (Encoding)GetEncoding(codepage).Clone();
+ enc.EncoderFallback = encoderFallback;
+ enc.DecoderFallback = decoderFallback;
+ }
+
+ return enc;
+ }
+
+ internal static void AddProvider(EncodingProvider provider)
+ {
+ if (provider == null)
+ throw new ArgumentNullException(nameof(provider));
+
+ lock (s_InternalSyncObject)
+ {
+ if (s_providers == null)
+ {
+ s_providers = new EncodingProvider[1] { provider };
+ return;
+ }
+
+ if (Array.IndexOf(s_providers, provider) >= 0)
+ {
+ return;
+ }
+
+ EncodingProvider[] providers = new EncodingProvider[s_providers.Length + 1];
+ Array.Copy(s_providers, providers, s_providers.Length);
+ providers[providers.Length - 1] = provider;
+ s_providers = providers;
+ }
+ }
+
+ internal static Encoding GetEncodingFromProvider(int codepage)
+ {
+ if (s_providers == null)
+ return null;
+
+ EncodingProvider[] providers = s_providers;
+ foreach (EncodingProvider provider in providers)
+ {
+ Encoding enc = provider.GetEncoding(codepage);
+ if (enc != null)
+ return enc;
+ }
+
+ return null;
+ }
+
+ internal static Encoding GetEncodingFromProvider(string encodingName)
+ {
+ if (s_providers == null)
+ return null;
+
+ EncodingProvider[] providers = s_providers;
+ foreach (EncodingProvider provider in providers)
+ {
+ Encoding enc = provider.GetEncoding(encodingName);
+ if (enc != null)
+ return enc;
+ }
+
+ return null;
+ }
+
+ internal static Encoding GetEncodingFromProvider(int codepage, EncoderFallback enc, DecoderFallback dec)
+ {
+ if (s_providers == null)
+ return null;
+
+ EncodingProvider[] providers = s_providers;
+ foreach (EncodingProvider provider in providers)
+ {
+ Encoding encing = provider.GetEncoding(codepage, enc, dec);
+ if (encing != null)
+ return encing;
+ }
+
+ return null;
+ }
+
+ internal static Encoding GetEncodingFromProvider(string encodingName, EncoderFallback enc, DecoderFallback dec)
+ {
+ if (s_providers == null)
+ return null;
+
+ EncodingProvider[] providers = s_providers;
+ foreach (EncodingProvider provider in providers)
+ {
+ Encoding encoding = provider.GetEncoding(encodingName, enc, dec);
+ if (encoding != null)
+ return encoding;
+ }
+
+ return null;
+ }
+
+ private static Object s_InternalSyncObject = new Object();
+ private static volatile EncodingProvider[] s_providers;
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/Normalization.cs b/src/mscorlib/shared/System/Text/Normalization.cs
new file mode 100644
index 0000000000..dc8bc2af71
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/Normalization.cs
@@ -0,0 +1,29 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Text
+{
+ // This is the enumeration for Normalization Forms
+ public enum NormalizationForm
+ {
+ FormC = 1,
+ FormD = 2,
+ FormKC = 5,
+ FormKD = 6
+ }
+
+ internal enum ExtendedNormalizationForms
+ {
+ FormC = 1,
+ FormD = 2,
+ FormKC = 5,
+ FormKD = 6,
+ FormIdna = 0xd,
+ FormCDisallowUnassigned = 0x101,
+ FormDDisallowUnassigned = 0x102,
+ FormKCDisallowUnassigned = 0x105,
+ FormKDDisallowUnassigned = 0x106,
+ FormIdnaDisallowUnassigned = 0x10d
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/StringBuilder.cs b/src/mscorlib/shared/System/Text/StringBuilder.cs
new file mode 100644
index 0000000000..df1a889823
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/StringBuilder.cs
@@ -0,0 +1,2409 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Text;
+using System.Runtime;
+using System.Runtime.Serialization;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.Versioning;
+using System.Security;
+using System.Threading;
+using System.Globalization;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+using System.Collections.Generic;
+
+namespace System.Text
+{
+ // This class represents a mutable string. It is convenient for situations in
+ // which it is desirable to modify a string, perhaps by removing, replacing, or
+ // inserting characters, without creating a new String subsequent to
+ // each modification.
+ //
+ // The methods contained within this class do not return a new StringBuilder
+ // object unless specified otherwise. This class may be used in conjunction with the String
+ // class to carry out modifications upon strings.
+ //
+ // When passing null into a constructor in VJ and VC, the null
+ // should be explicitly type cast.
+ // For Example:
+ // StringBuilder sb1 = new StringBuilder((StringBuilder)null);
+ // StringBuilder sb2 = new StringBuilder((String)null);
+ // Console.WriteLine(sb1);
+ // Console.WriteLine(sb2);
+ //
+ [Serializable]
+ public sealed partial class StringBuilder : ISerializable
+ {
+ // A StringBuilder is internally represented as a linked list of blocks each of which holds
+ // a chunk of the string. It turns out string as a whole can also be represented as just a chunk,
+ // so that is what we do.
+
+ //
+ //
+ // CLASS VARIABLES
+ //
+ //
+ internal char[] m_ChunkChars; // The characters in this block
+ internal StringBuilder m_ChunkPrevious; // Link to the block logically before this block
+ internal int m_ChunkLength; // The index in m_ChunkChars that represent the end of the block
+ internal int m_ChunkOffset; // The logical offset (sum of all characters in previous blocks)
+ internal int m_MaxCapacity = 0;
+
+ //
+ //
+ // STATIC CONSTANTS
+ //
+ //
+ internal const int DefaultCapacity = 16;
+ private const String CapacityField = "Capacity";
+ private const String MaxCapacityField = "m_MaxCapacity";
+ private const String StringValueField = "m_StringValue";
+ private const String ThreadIDField = "m_currentThread";
+ // We want to keep chunk arrays out of large object heap (< 85K bytes ~ 40K chars) to be sure.
+ // Making the maximum chunk size big means less allocation code called, but also more waste
+ // in unused characters and slower inserts / replaces (since you do need to slide characters over
+ // within a buffer).
+ internal const int MaxChunkSize = 8000;
+
+ //
+ //
+ //CONSTRUCTORS
+ //
+ //
+
+ // Creates a new empty string builder (i.e., it represents String.Empty)
+ // with the default capacity (16 characters).
+ public StringBuilder()
+ {
+ m_MaxCapacity = int.MaxValue;
+ m_ChunkChars = new char[DefaultCapacity];
+ }
+
+ // Create a new empty string builder (i.e., it represents String.Empty)
+ // with the specified capacity.
+ public StringBuilder(int capacity)
+ : this(capacity, int.MaxValue)
+ {
+ }
+
+ // Creates a new string builder from the specified string. If value
+ // is a null String (i.e., if it represents String.NullString)
+ // then the new string builder will also be null (i.e., it will also represent
+ // String.NullString).
+ //
+ public StringBuilder(String value)
+ : this(value, DefaultCapacity)
+ {
+ }
+
+ // Creates a new string builder from the specified string with the specified
+ // capacity. If value is a null String (i.e., if it represents
+ // String.NullString) then the new string builder will also be null
+ // (i.e., it will also represent String.NullString).
+ // The maximum number of characters this string may contain is set by capacity.
+ //
+ public StringBuilder(String value, int capacity)
+ : this(value, 0, ((value != null) ? value.Length : 0), capacity)
+ {
+ }
+
+ // Creates a new string builder from the specifed substring with the specified
+ // capacity. The maximum number of characters is set by capacity.
+ //
+ public StringBuilder(String value, int startIndex, int length, int capacity)
+ {
+ if (capacity < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(capacity),
+ SR.Format(SR.ArgumentOutOfRange_MustBePositive, nameof(capacity)));
+ }
+ if (length < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(length),
+ SR.Format(SR.ArgumentOutOfRange_MustBeNonNegNum, nameof(length)));
+ }
+ if (startIndex < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
+ }
+ Contract.EndContractBlock();
+
+ if (value == null)
+ {
+ value = String.Empty;
+ }
+ if (startIndex > value.Length - length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_IndexLength);
+ }
+ m_MaxCapacity = Int32.MaxValue;
+ if (capacity == 0)
+ {
+ capacity = DefaultCapacity;
+ }
+ if (capacity < length)
+ capacity = length;
+
+ m_ChunkChars = new char[capacity];
+ m_ChunkLength = length;
+
+ unsafe
+ {
+ fixed (char* sourcePtr = value)
+ ThreadSafeCopy(sourcePtr + startIndex, m_ChunkChars, 0, length);
+ }
+ }
+
+ // Creates an empty StringBuilder with a minimum capacity of capacity
+ // and a maximum capacity of maxCapacity.
+ public StringBuilder(int capacity, int maxCapacity)
+ {
+ if (capacity > maxCapacity)
+ {
+ throw new ArgumentOutOfRangeException(nameof(capacity), SR.ArgumentOutOfRange_Capacity);
+ }
+ if (maxCapacity < 1)
+ {
+ throw new ArgumentOutOfRangeException(nameof(maxCapacity), SR.ArgumentOutOfRange_SmallMaxCapacity);
+ }
+ if (capacity < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(capacity),
+ SR.Format(SR.ArgumentOutOfRange_MustBePositive, nameof(capacity)));
+ }
+ Contract.EndContractBlock();
+
+ if (capacity == 0)
+ {
+ capacity = Math.Min(DefaultCapacity, maxCapacity);
+ }
+
+ m_MaxCapacity = maxCapacity;
+ m_ChunkChars = new char[capacity];
+ }
+
+ private StringBuilder(SerializationInfo info, StreamingContext context)
+ {
+ if (info == null)
+ throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ int persistedCapacity = 0;
+ string persistedString = null;
+ int persistedMaxCapacity = Int32.MaxValue;
+ bool capacityPresent = false;
+
+ // Get the data
+ SerializationInfoEnumerator enumerator = info.GetEnumerator();
+ while (enumerator.MoveNext())
+ {
+ switch (enumerator.Name)
+ {
+ case MaxCapacityField:
+ persistedMaxCapacity = info.GetInt32(MaxCapacityField);
+ break;
+ case StringValueField:
+ persistedString = info.GetString(StringValueField);
+ break;
+ case CapacityField:
+ persistedCapacity = info.GetInt32(CapacityField);
+ capacityPresent = true;
+ break;
+ default:
+ // Ignore other fields for forward compatibility.
+ break;
+ }
+ }
+
+ // Check values and set defaults
+ if (persistedString == null)
+ {
+ persistedString = String.Empty;
+ }
+ if (persistedMaxCapacity < 1 || persistedString.Length > persistedMaxCapacity)
+ {
+ throw new SerializationException(SR.Serialization_StringBuilderMaxCapacity);
+ }
+
+ if (!capacityPresent)
+ {
+ // StringBuilder in V1.X did not persist the Capacity, so this is a valid legacy code path.
+ persistedCapacity = DefaultCapacity;
+ if (persistedCapacity < persistedString.Length)
+ {
+ persistedCapacity = persistedString.Length;
+ }
+ if (persistedCapacity > persistedMaxCapacity)
+ {
+ persistedCapacity = persistedMaxCapacity;
+ }
+ }
+ if (persistedCapacity < 0 || persistedCapacity < persistedString.Length || persistedCapacity > persistedMaxCapacity)
+ {
+ throw new SerializationException(SR.Serialization_StringBuilderCapacity);
+ }
+
+ // Assign
+ m_MaxCapacity = persistedMaxCapacity;
+ m_ChunkChars = new char[persistedCapacity];
+ persistedString.CopyTo(0, m_ChunkChars, 0, persistedString.Length);
+ m_ChunkLength = persistedString.Length;
+ m_ChunkPrevious = null;
+ VerifyClassInvariant();
+ }
+
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ if (info == null)
+ {
+ throw new ArgumentNullException(nameof(info));
+ }
+ Contract.EndContractBlock();
+
+ VerifyClassInvariant();
+ info.AddValue(MaxCapacityField, m_MaxCapacity);
+ info.AddValue(CapacityField, Capacity);
+ info.AddValue(StringValueField, ToString());
+ // Note: persist "m_currentThread" to be compatible with old versions
+ info.AddValue(ThreadIDField, 0);
+ }
+
+ [System.Diagnostics.Conditional("_DEBUG")]
+ private void VerifyClassInvariant()
+ {
+ Debug.Assert((uint)(m_ChunkOffset + m_ChunkChars.Length) >= m_ChunkOffset, "Integer Overflow");
+ StringBuilder currentBlock = this;
+ int maxCapacity = this.m_MaxCapacity;
+ for (;;)
+ {
+ // All blocks have copy of the maxCapacity.
+ Debug.Assert(currentBlock.m_MaxCapacity == maxCapacity, "Bad maxCapacity");
+ Debug.Assert(currentBlock.m_ChunkChars != null, "Empty Buffer");
+
+ Debug.Assert(currentBlock.m_ChunkLength <= currentBlock.m_ChunkChars.Length, "Out of range length");
+ Debug.Assert(currentBlock.m_ChunkLength >= 0, "Negative length");
+ Debug.Assert(currentBlock.m_ChunkOffset >= 0, "Negative offset");
+
+ StringBuilder prevBlock = currentBlock.m_ChunkPrevious;
+ if (prevBlock == null)
+ {
+ Debug.Assert(currentBlock.m_ChunkOffset == 0, "First chunk's offset is not 0");
+ break;
+ }
+ // There are no gaps in the blocks.
+ Debug.Assert(currentBlock.m_ChunkOffset == prevBlock.m_ChunkOffset + prevBlock.m_ChunkLength, "There is a gap between chunks!");
+ currentBlock = prevBlock;
+ }
+ }
+
+ public int Capacity
+ {
+ get { return m_ChunkChars.Length + m_ChunkOffset; }
+ set
+ {
+ if (value < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(value), SR.ArgumentOutOfRange_NegativeCapacity);
+ }
+ if (value > MaxCapacity)
+ {
+ throw new ArgumentOutOfRangeException(nameof(value), SR.ArgumentOutOfRange_Capacity);
+ }
+ if (value < Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(value), SR.ArgumentOutOfRange_SmallCapacity);
+ }
+ Contract.EndContractBlock();
+
+ if (Capacity != value)
+ {
+ int newLen = value - m_ChunkOffset;
+ char[] newArray = new char[newLen];
+ Array.Copy(m_ChunkChars, 0, newArray, 0, m_ChunkLength);
+ m_ChunkChars = newArray;
+ }
+ }
+ }
+
+ public int MaxCapacity
+ {
+ get { return m_MaxCapacity; }
+ }
+
+ // Ensures that the capacity of this string builder is at least the specified value.
+ // If capacity is greater than the capacity of this string builder, then the capacity
+ // is set to capacity; otherwise the capacity is unchanged.
+ //
+ public int EnsureCapacity(int capacity)
+ {
+ if (capacity < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(capacity), SR.ArgumentOutOfRange_NegativeCapacity);
+ }
+ Contract.EndContractBlock();
+
+ if (Capacity < capacity)
+ Capacity = capacity;
+ return Capacity;
+ }
+
+ public override String ToString()
+ {
+ Contract.Ensures(Contract.Result<String>() != null);
+
+ VerifyClassInvariant();
+
+ if (Length == 0)
+ return String.Empty;
+
+ string ret = string.FastAllocateString(Length);
+ StringBuilder chunk = this;
+ unsafe
+ {
+ fixed (char* destinationPtr = ret)
+ {
+ do
+ {
+ if (chunk.m_ChunkLength > 0)
+ {
+ // Copy these into local variables so that they are stable even in the presence of race conditions
+ char[] sourceArray = chunk.m_ChunkChars;
+ int chunkOffset = chunk.m_ChunkOffset;
+ int chunkLength = chunk.m_ChunkLength;
+
+ // Check that we will not overrun our boundaries.
+ if ((uint)(chunkLength + chunkOffset) <= (uint)ret.Length && (uint)chunkLength <= (uint)sourceArray.Length)
+ {
+ fixed (char* sourcePtr = &sourceArray[0])
+ string.wstrcpy(destinationPtr + chunkOffset, sourcePtr, chunkLength);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(chunkLength), SR.ArgumentOutOfRange_Index);
+ }
+ }
+ chunk = chunk.m_ChunkPrevious;
+ } while (chunk != null);
+
+ return ret;
+ }
+ }
+ }
+
+
+ // Converts a substring of this string builder to a String.
+ public String ToString(int startIndex, int length)
+ {
+ Contract.Ensures(Contract.Result<String>() != null);
+
+ int currentLength = this.Length;
+ if (startIndex < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
+ }
+ if (startIndex > currentLength)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndexLargerThanLength);
+ }
+ if (length < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NegativeLength);
+ }
+ if (startIndex > (currentLength - length))
+ {
+ throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_IndexLength);
+ }
+
+ VerifyClassInvariant();
+
+ StringBuilder chunk = this;
+ int sourceEndIndex = startIndex + length;
+
+ string ret = string.FastAllocateString(length);
+ int curDestIndex = length;
+ unsafe
+ {
+ fixed (char* destinationPtr = ret)
+ {
+ while (curDestIndex > 0)
+ {
+ int chunkEndIndex = sourceEndIndex - chunk.m_ChunkOffset;
+ if (chunkEndIndex >= 0)
+ {
+ if (chunkEndIndex > chunk.m_ChunkLength)
+ chunkEndIndex = chunk.m_ChunkLength;
+
+ int countLeft = curDestIndex;
+ int chunkCount = countLeft;
+ int chunkStartIndex = chunkEndIndex - countLeft;
+ if (chunkStartIndex < 0)
+ {
+ chunkCount += chunkStartIndex;
+ chunkStartIndex = 0;
+ }
+ curDestIndex -= chunkCount;
+
+ if (chunkCount > 0)
+ {
+ // work off of local variables so that they are stable even in the presence of race conditions
+ char[] sourceArray = chunk.m_ChunkChars;
+
+ // Check that we will not overrun our boundaries.
+ if ((uint)(chunkCount + curDestIndex) <= (uint)length && (uint)(chunkCount + chunkStartIndex) <= (uint)sourceArray.Length)
+ {
+ fixed (char* sourcePtr = &sourceArray[chunkStartIndex])
+ string.wstrcpy(destinationPtr + curDestIndex, sourcePtr, chunkCount);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(chunkCount), SR.ArgumentOutOfRange_Index);
+ }
+ }
+ }
+ chunk = chunk.m_ChunkPrevious;
+ }
+
+ return ret;
+ }
+ }
+ }
+
+ // Convenience method for sb.Length=0;
+ public StringBuilder Clear()
+ {
+ this.Length = 0;
+ return this;
+ }
+
+ // Sets the length of the String in this buffer. If length is less than the current
+ // instance, the StringBuilder is truncated. If length is greater than the current
+ // instance, nulls are appended. The capacity is adjusted to be the same as the length.
+
+ public int Length
+ {
+ get
+ {
+ Contract.Ensures(Contract.Result<int>() >= 0);
+ return m_ChunkOffset + m_ChunkLength;
+ }
+ set
+ {
+ //If the new length is less than 0 or greater than our Maximum capacity, bail.
+ if (value < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(value), SR.ArgumentOutOfRange_NegativeLength);
+ }
+
+ if (value > MaxCapacity)
+ {
+ throw new ArgumentOutOfRangeException(nameof(value), SR.ArgumentOutOfRange_SmallCapacity);
+ }
+ Contract.EndContractBlock();
+
+ int originalCapacity = Capacity;
+
+ if (value == 0 && m_ChunkPrevious == null)
+ {
+ m_ChunkLength = 0;
+ m_ChunkOffset = 0;
+ Debug.Assert(Capacity >= originalCapacity, "setting the Length should never decrease the Capacity");
+ return;
+ }
+
+ int delta = value - Length;
+ // if the specified length is greater than the current length
+ if (delta > 0)
+ {
+ // the end of the string value of the current StringBuilder object is padded with the Unicode NULL character
+ Append('\0', delta); // We could improve on this, but who does this anyway?
+ }
+ // if the specified length is less than or equal to the current length
+ else
+ {
+ StringBuilder chunk = FindChunkForIndex(value);
+ if (chunk != this)
+ {
+ // we crossed a chunk boundary when reducing the Length, we must replace this middle-chunk with a new
+ // larger chunk to ensure the original capacity is preserved
+ int newLen = originalCapacity - chunk.m_ChunkOffset;
+ char[] newArray = new char[newLen];
+
+ Debug.Assert(newLen > chunk.m_ChunkChars.Length, "the new chunk should be larger than the one it is replacing");
+ Array.Copy(chunk.m_ChunkChars, 0, newArray, 0, chunk.m_ChunkLength);
+
+ m_ChunkChars = newArray;
+ m_ChunkPrevious = chunk.m_ChunkPrevious;
+ m_ChunkOffset = chunk.m_ChunkOffset;
+ }
+ m_ChunkLength = value - chunk.m_ChunkOffset;
+ VerifyClassInvariant();
+ }
+ Debug.Assert(Capacity >= originalCapacity, "setting the Length should never decrease the Capacity");
+ }
+ }
+
+ [System.Runtime.CompilerServices.IndexerName("Chars")]
+ public char this[int index]
+ {
+ get
+ {
+ StringBuilder chunk = this;
+ for (;;)
+ {
+ int indexInBlock = index - chunk.m_ChunkOffset;
+ if (indexInBlock >= 0)
+ {
+ if (indexInBlock >= chunk.m_ChunkLength)
+ throw new IndexOutOfRangeException();
+ return chunk.m_ChunkChars[indexInBlock];
+ }
+ chunk = chunk.m_ChunkPrevious;
+ if (chunk == null)
+ throw new IndexOutOfRangeException();
+ }
+ }
+ set
+ {
+ StringBuilder chunk = this;
+ for (;;)
+ {
+ int indexInBlock = index - chunk.m_ChunkOffset;
+ if (indexInBlock >= 0)
+ {
+ if (indexInBlock >= chunk.m_ChunkLength)
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
+ chunk.m_ChunkChars[indexInBlock] = value;
+ return;
+ }
+ chunk = chunk.m_ChunkPrevious;
+ if (chunk == null)
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
+ }
+ }
+ }
+
+ // Appends a character at the end of this string builder. The capacity is adjusted as needed.
+ public StringBuilder Append(char value, int repeatCount)
+ {
+ if (repeatCount < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(repeatCount), SR.ArgumentOutOfRange_NegativeCount);
+ }
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ Contract.EndContractBlock();
+
+ if (repeatCount == 0)
+ {
+ return this;
+ }
+
+ // this is where we can check if the repeatCount will put us over m_MaxCapacity
+ // We are doing the check here to prevent the corruption of the StringBuilder.
+ int newLength = Length + repeatCount;
+ if (newLength > m_MaxCapacity || newLength < repeatCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(repeatCount), SR.ArgumentOutOfRange_LengthGreaterThanCapacity);
+ }
+
+ int idx = m_ChunkLength;
+ while (repeatCount > 0)
+ {
+ if (idx < m_ChunkChars.Length)
+ {
+ m_ChunkChars[idx++] = value;
+ --repeatCount;
+ }
+ else
+ {
+ m_ChunkLength = idx;
+ ExpandByABlock(repeatCount);
+ Debug.Assert(m_ChunkLength == 0, "Expand should create a new block");
+ idx = 0;
+ }
+ }
+ m_ChunkLength = idx;
+ VerifyClassInvariant();
+ return this;
+ }
+
+ // Appends an array of characters at the end of this string builder. The capacity is adjusted as needed.
+ public StringBuilder Append(char[] value, int startIndex, int charCount)
+ {
+ if (startIndex < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_GenericPositive);
+ }
+ if (charCount < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GenericPositive);
+ }
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ Contract.EndContractBlock();
+
+ if (value == null)
+ {
+ if (startIndex == 0 && charCount == 0)
+ {
+ return this;
+ }
+ throw new ArgumentNullException(nameof(value));
+ }
+ if (charCount > value.Length - startIndex)
+ {
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_Index);
+ }
+
+ if (charCount == 0)
+ {
+ return this;
+ }
+ unsafe
+ {
+ fixed (char* valueChars = &value[startIndex])
+ {
+ Append(valueChars, charCount);
+
+ return this;
+ }
+ }
+ }
+
+
+ // Appends a copy of this string at the end of this string builder.
+ public StringBuilder Append(String value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ if (value != null)
+ {
+ // This is a hand specialization of the 'AppendHelper' code below.
+ // We could have just called AppendHelper.
+ char[] chunkChars = m_ChunkChars;
+ int chunkLength = m_ChunkLength;
+ int valueLen = value.Length;
+ int newCurrentIndex = chunkLength + valueLen;
+ if (newCurrentIndex < chunkChars.Length) // Use strictly < to avoid issue if count == 0, newIndex == length
+ {
+ if (valueLen <= 2)
+ {
+ if (valueLen > 0)
+ chunkChars[chunkLength] = value[0];
+ if (valueLen > 1)
+ chunkChars[chunkLength + 1] = value[1];
+ }
+ else
+ {
+ unsafe
+ {
+ fixed (char* valuePtr = value)
+ fixed (char* destPtr = &chunkChars[chunkLength])
+ string.wstrcpy(destPtr, valuePtr, valueLen);
+ }
+ }
+ m_ChunkLength = newCurrentIndex;
+ }
+ else
+ AppendHelper(value);
+ }
+ return this;
+ }
+
+
+ // We put this fixed in its own helper to avoid the cost zero initing valueChars in the
+ // case we don't actually use it.
+ private void AppendHelper(string value)
+ {
+ unsafe
+ {
+ fixed (char* valueChars = value)
+ Append(valueChars, value.Length);
+ }
+ }
+
+ // Appends a copy of the characters in value from startIndex to startIndex +
+ // count at the end of this string builder.
+ public StringBuilder Append(String value, int startIndex, int count)
+ {
+ if (startIndex < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
+ }
+
+ if (count < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GenericPositive);
+ }
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ //If the value being added is null, eat the null
+ //and return.
+ if (value == null)
+ {
+ if (startIndex == 0 && count == 0)
+ {
+ return this;
+ }
+ throw new ArgumentNullException(nameof(value));
+ }
+
+ if (count == 0)
+ {
+ return this;
+ }
+
+ if (startIndex > value.Length - count)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
+ }
+
+ unsafe
+ {
+ fixed (char* valueChars = value)
+ {
+ Append(valueChars + startIndex, count);
+
+ return this;
+ }
+ }
+ }
+
+ public StringBuilder AppendLine()
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(Environment.NewLine);
+ }
+
+ public StringBuilder AppendLine(string value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ Append(value);
+ return Append(Environment.NewLine);
+ }
+
+ public void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
+ {
+ if (destination == null)
+ {
+ throw new ArgumentNullException(nameof(destination));
+ }
+
+ if (count < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(count), SR.Arg_NegativeArgCount);
+ }
+
+ if (destinationIndex < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(destinationIndex),
+ SR.Format(SR.ArgumentOutOfRange_MustBeNonNegNum, nameof(destinationIndex)));
+ }
+
+ if (destinationIndex > destination.Length - count)
+ {
+ throw new ArgumentException(SR.ArgumentOutOfRange_OffsetOut);
+ }
+
+ if ((uint)sourceIndex > (uint)Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(sourceIndex), SR.ArgumentOutOfRange_Index);
+ }
+
+ if (sourceIndex > Length - count)
+ {
+ throw new ArgumentException(SR.Arg_LongerThanSrcString);
+ }
+ Contract.EndContractBlock();
+
+ VerifyClassInvariant();
+
+ StringBuilder chunk = this;
+ int sourceEndIndex = sourceIndex + count;
+ int curDestIndex = destinationIndex + count;
+ while (count > 0)
+ {
+ int chunkEndIndex = sourceEndIndex - chunk.m_ChunkOffset;
+ if (chunkEndIndex >= 0)
+ {
+ if (chunkEndIndex > chunk.m_ChunkLength)
+ chunkEndIndex = chunk.m_ChunkLength;
+
+ int chunkCount = count;
+ int chunkStartIndex = chunkEndIndex - count;
+ if (chunkStartIndex < 0)
+ {
+ chunkCount += chunkStartIndex;
+ chunkStartIndex = 0;
+ }
+ curDestIndex -= chunkCount;
+ count -= chunkCount;
+
+ // SafeCritical: we ensure that chunkStartIndex + chunkCount are within range of m_chunkChars
+ // as well as ensuring that curDestIndex + chunkCount are within range of destination
+ ThreadSafeCopy(chunk.m_ChunkChars, chunkStartIndex, destination, curDestIndex, chunkCount);
+ }
+ chunk = chunk.m_ChunkPrevious;
+ }
+ }
+
+ // Inserts multiple copies of a string into this string builder at the specified position.
+ // Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, this
+ // string builder is not changed.
+ //
+ public StringBuilder Insert(int index, String value, int count)
+ {
+ if (count < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
+ }
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ Contract.EndContractBlock();
+
+ //Range check the index.
+ int currentLength = Length;
+ if ((uint)index > (uint)currentLength)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
+ }
+
+ //If value is null, empty or count is 0, do nothing. This is ECMA standard.
+ if (value == null || value.Length == 0 || count == 0)
+ {
+ return this;
+ }
+
+ //Ensure we don't insert more chars than we can hold, and we don't
+ //have any integer overflow in our inserted characters.
+ long insertingChars = (long)value.Length * count;
+ if (insertingChars > MaxCapacity - this.Length)
+ {
+ throw new OutOfMemoryException();
+ }
+ Debug.Assert(insertingChars + this.Length < Int32.MaxValue);
+
+ StringBuilder chunk;
+ int indexInChunk;
+ MakeRoom(index, (int)insertingChars, out chunk, out indexInChunk, false);
+ unsafe
+ {
+ fixed (char* valuePtr = value)
+ {
+ while (count > 0)
+ {
+ ReplaceInPlaceAtChunk(ref chunk, ref indexInChunk, valuePtr, value.Length);
+ --count;
+ }
+
+ return this;
+ }
+ }
+ }
+
+ // Removes the specified characters from this string builder.
+ // The length of this string builder is reduced by
+ // length, but the capacity is unaffected.
+ //
+ public StringBuilder Remove(int startIndex, int length)
+ {
+ if (length < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NegativeLength);
+ }
+
+ if (startIndex < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
+ }
+
+ if (length > Length - startIndex)
+ {
+ throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_Index);
+ }
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ Contract.EndContractBlock();
+
+ if (Length == length && startIndex == 0)
+ {
+ // Optimization. If we are deleting everything
+ Length = 0;
+ return this;
+ }
+
+ if (length > 0)
+ {
+ StringBuilder chunk;
+ int indexInChunk;
+ Remove(startIndex, length, out chunk, out indexInChunk);
+ }
+ return this;
+ }
+
+ // Appends a boolean to the end of this string builder.
+ // The capacity is adjusted as needed.
+ public StringBuilder Append(bool value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends an sbyte to this string builder.
+ // The capacity is adjusted as needed.
+ [CLSCompliant(false)]
+ public StringBuilder Append(sbyte value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends a ubyte to this string builder.
+ // The capacity is adjusted as needed.
+ public StringBuilder Append(byte value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends a character at the end of this string builder. The capacity is adjusted as needed.
+ public StringBuilder Append(char value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ if (m_ChunkLength < m_ChunkChars.Length)
+ m_ChunkChars[m_ChunkLength++] = value;
+ else
+ Append(value, 1);
+ return this;
+ }
+
+ // Appends a short to this string builder.
+ // The capacity is adjusted as needed.
+ public StringBuilder Append(short value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends an int to this string builder.
+ // The capacity is adjusted as needed.
+ public StringBuilder Append(int value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends a long to this string builder.
+ // The capacity is adjusted as needed.
+ public StringBuilder Append(long value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends a float to this string builder.
+ // The capacity is adjusted as needed.
+ public StringBuilder Append(float value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends a double to this string builder.
+ // The capacity is adjusted as needed.
+ public StringBuilder Append(double value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ public StringBuilder Append(decimal value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends an ushort to this string builder.
+ // The capacity is adjusted as needed.
+ [CLSCompliant(false)]
+ public StringBuilder Append(ushort value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends an uint to this string builder.
+ // The capacity is adjusted as needed.
+ [CLSCompliant(false)]
+ public StringBuilder Append(uint value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends an unsigned long to this string builder.
+ // The capacity is adjusted as needed.
+ [CLSCompliant(false)]
+ public StringBuilder Append(ulong value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Append(value.ToString());
+ }
+
+ // Appends an Object to this string builder.
+ // The capacity is adjusted as needed.
+ public StringBuilder Append(Object value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ if (null == value)
+ {
+ //Appending null is now a no-op.
+ return this;
+ }
+ return Append(value.ToString());
+ }
+
+ // Appends all of the characters in value to the current instance.
+ public StringBuilder Append(char[] value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ if (null != value && value.Length > 0)
+ {
+ unsafe
+ {
+ fixed (char* valueChars = &value[0])
+ Append(valueChars, value.Length);
+ }
+ }
+ return this;
+ }
+
+ // Append joined values with a separator between each value.
+ public unsafe StringBuilder AppendJoin<T>(char separator, params T[] values)
+ {
+ // Defer argument validation to the internal function
+ return AppendJoinCore(&separator, 1, values);
+ }
+
+ public unsafe StringBuilder AppendJoin<T>(string separator, params T[] values)
+ {
+ separator = separator ?? string.Empty;
+ fixed (char* pSeparator = separator)
+ {
+ // Defer argument validation to the internal function
+ return AppendJoinCore(pSeparator, separator.Length, values);
+ }
+ }
+
+ public unsafe StringBuilder AppendJoin<T>(char separator, IEnumerable<T> values)
+ {
+ // Defer argument validation to the internal function
+ return AppendJoinCore(&separator, 1, values);
+ }
+
+ public unsafe StringBuilder AppendJoin<T>(string separator, IEnumerable<T> values)
+ {
+ separator = separator ?? string.Empty;
+ fixed (char* pSeparator = separator)
+ {
+ // Defer argument validation to the internal function
+ return AppendJoinCore(pSeparator, separator.Length, values);
+ }
+ }
+
+ private unsafe StringBuilder AppendJoinCore<T>(char* separator, int separatorLength, params T[] values)
+ {
+ if (values == null)
+ throw new ArgumentNullException(nameof(values));
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ if (values.Length == 0)
+ return this;
+
+ var value = values[0];
+ if (value != null)
+ Append(value.ToString());
+
+ for (var i = 1; i < values.Length; i++)
+ {
+ Append(separator, separatorLength);
+ value = values[i];
+ if (value != null)
+ Append(value.ToString());
+ }
+ return this;
+ }
+
+ private unsafe StringBuilder AppendJoinCore<T>(char* separator, int separatorLength, IEnumerable<T> values)
+ {
+ if (values == null)
+ throw new ArgumentNullException(nameof(values));
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ using (var en = values.GetEnumerator())
+ {
+ if (!en.MoveNext())
+ return this;
+
+ var value = en.Current;
+ if (value != null)
+ Append(value.ToString());
+
+ while (en.MoveNext())
+ {
+ Append(separator, separatorLength);
+ value = en.Current;
+ if (value != null)
+ Append(value.ToString());
+ }
+ }
+ return this;
+ }
+
+ /*====================================Insert====================================
+ **
+ ==============================================================================*/
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, String value)
+ {
+ if ((uint)index > (uint)Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
+ }
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ Contract.EndContractBlock();
+
+ if (value != null)
+ {
+ unsafe
+ {
+ fixed (char* sourcePtr = value)
+ Insert(index, sourcePtr, value.Length);
+ }
+ }
+ return this;
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, bool value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ [CLSCompliant(false)]
+ public StringBuilder Insert(int index, sbyte value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, byte value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, short value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ public StringBuilder Insert(int index, char value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ unsafe
+ {
+ Insert(index, &value, 1);
+ }
+ return this;
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, char[] value)
+ {
+ if ((uint)index > (uint)Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
+ }
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ Contract.EndContractBlock();
+
+ if (value != null)
+ Insert(index, value, 0, value.Length);
+ return this;
+ }
+
+ // Returns a reference to the StringBuilder with charCount characters from
+ // value inserted into the buffer at index. Existing characters are shifted
+ // to make room for the new text and capacity is adjusted as required. If value is null, the StringBuilder
+ // is unchanged. Characters are taken from value starting at position startIndex.
+ public StringBuilder Insert(int index, char[] value, int startIndex, int charCount)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ int currentLength = Length;
+ if ((uint)index > (uint)currentLength)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
+ }
+
+ //If they passed in a null char array, just jump out quickly.
+ if (value == null)
+ {
+ if (startIndex == 0 && charCount == 0)
+ {
+ return this;
+ }
+ throw new ArgumentNullException(nameof(value), SR.ArgumentNull_String);
+ }
+
+ //Range check the array.
+ if (startIndex < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
+ }
+
+ if (charCount < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GenericPositive);
+ }
+
+ if (startIndex > value.Length - charCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
+ }
+
+ if (charCount > 0)
+ {
+ unsafe
+ {
+ fixed (char* sourcePtr = &value[startIndex])
+ Insert(index, sourcePtr, charCount);
+ }
+ }
+ return this;
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, int value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, long value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, float value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with ; value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed.
+ //
+ public StringBuilder Insert(int index, double value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ public StringBuilder Insert(int index, decimal value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed.
+ //
+ [CLSCompliant(false)]
+ public StringBuilder Insert(int index, ushort value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed.
+ //
+ [CLSCompliant(false)]
+ public StringBuilder Insert(int index, uint value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to the StringBuilder with value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the new text.
+ // The capacity is adjusted as needed.
+ //
+ [CLSCompliant(false)]
+ public StringBuilder Insert(int index, ulong value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Insert(index, value.ToString(), 1);
+ }
+
+ // Returns a reference to this string builder with value inserted into
+ // the buffer at index. Existing characters are shifted to make room for the
+ // new text. The capacity is adjusted as needed. If value equals String.Empty, the
+ // StringBuilder is not changed. No changes are made if value is null.
+ //
+ public StringBuilder Insert(int index, Object value)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ if (null == value)
+ {
+ return this;
+ }
+ return Insert(index, value.ToString(), 1);
+ }
+
+ public StringBuilder AppendFormat(String format, Object arg0)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return AppendFormatHelper(null, format, new ParamsArray(arg0));
+ }
+
+ public StringBuilder AppendFormat(String format, Object arg0, Object arg1)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return AppendFormatHelper(null, format, new ParamsArray(arg0, arg1));
+ }
+
+ public StringBuilder AppendFormat(String format, Object arg0, Object arg1, Object arg2)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return AppendFormatHelper(null, format, new ParamsArray(arg0, arg1, arg2));
+ }
+
+ public StringBuilder AppendFormat(String format, params Object[] args)
+ {
+ if (args == null)
+ {
+ // To preserve the original exception behavior, throw an exception about format if both
+ // args and format are null. The actual null check for format is in AppendFormatHelper.
+ throw new ArgumentNullException((format == null) ? nameof(format) : nameof(args));
+ }
+ Contract.Ensures(Contract.Result<String>() != null);
+ Contract.EndContractBlock();
+
+ return AppendFormatHelper(null, format, new ParamsArray(args));
+ }
+
+ public StringBuilder AppendFormat(IFormatProvider provider, String format, Object arg0)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return AppendFormatHelper(provider, format, new ParamsArray(arg0));
+ }
+
+ public StringBuilder AppendFormat(IFormatProvider provider, String format, Object arg0, Object arg1)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return AppendFormatHelper(provider, format, new ParamsArray(arg0, arg1));
+ }
+
+ public StringBuilder AppendFormat(IFormatProvider provider, String format, Object arg0, Object arg1, Object arg2)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return AppendFormatHelper(provider, format, new ParamsArray(arg0, arg1, arg2));
+ }
+
+ public StringBuilder AppendFormat(IFormatProvider provider, String format, params Object[] args)
+ {
+ if (args == null)
+ {
+ // To preserve the original exception behavior, throw an exception about format if both
+ // args and format are null. The actual null check for format is in AppendFormatHelper.
+ throw new ArgumentNullException((format == null) ? nameof(format) : nameof(args));
+ }
+ Contract.Ensures(Contract.Result<String>() != null);
+ Contract.EndContractBlock();
+
+ return AppendFormatHelper(provider, format, new ParamsArray(args));
+ }
+
+ private static void FormatError()
+ {
+ throw new FormatException(SR.Format_InvalidString);
+ }
+
+ // undocumented exclusive limits on the range for Argument Hole Index and Argument Hole Alignment.
+ private const int Index_Limit = 1000000; // Note: 0 <= ArgIndex < Index_Limit
+ private const int Width_Limit = 1000000; // Note: -Width_Limit < ArgAlign < Width_Limit
+
+ internal StringBuilder AppendFormatHelper(IFormatProvider provider, String format, ParamsArray args)
+ {
+ if (format == null)
+ {
+ throw new ArgumentNullException(nameof(format));
+ }
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ Contract.EndContractBlock();
+
+ int pos = 0;
+ int len = format.Length;
+ char ch = '\x0';
+ StringBuilder unescapedItemFormat = null;
+
+ ICustomFormatter cf = null;
+ if (provider != null)
+ {
+ cf = (ICustomFormatter)provider.GetFormat(typeof(ICustomFormatter));
+ }
+
+ while (true)
+ {
+ while (pos < len)
+ {
+ ch = format[pos];
+
+ pos++;
+ // Is it a closing brace?
+ if (ch == '}')
+ {
+ // Check next character (if there is one) to see if it is escaped. eg }}
+ if (pos < len && format[pos] == '}')
+ pos++;
+ else
+ // Otherwise treat it as an error (Mismatched closing brace)
+ FormatError();
+ }
+ // Is it a opening brace?
+ if (ch == '{')
+ {
+ // Check next character (if there is one) to see if it is escaped. eg {{
+ if (pos < len && format[pos] == '{')
+ pos++;
+ else
+ {
+ // Otherwise treat it as the opening brace of an Argument Hole.
+ pos--;
+ break;
+ }
+ }
+ // If it neither then treat the character as just text.
+ Append(ch);
+ }
+
+ //
+ // Start of parsing of Argument Hole.
+ // Argument Hole ::= { Index (, WS* Alignment WS*)? (: Formatting)? }
+ //
+ if (pos == len) break;
+
+ //
+ // Start of parsing required Index parameter.
+ // Index ::= ('0'-'9')+ WS*
+ //
+ pos++;
+ // If reached end of text then error (Unexpected end of text)
+ // or character is not a digit then error (Unexpected Character)
+ if (pos == len || (ch = format[pos]) < '0' || ch > '9') FormatError();
+ int index = 0;
+ do
+ {
+ index = index * 10 + ch - '0';
+ pos++;
+ // If reached end of text then error (Unexpected end of text)
+ if (pos == len) FormatError();
+ ch = format[pos];
+ // so long as character is digit and value of the index is less than 1000000 ( index limit )
+ } while (ch >= '0' && ch <= '9' && index < Index_Limit);
+
+ // If value of index is not within the range of the arguments passed in then error (Index out of range)
+ if (index >= args.Length) throw new FormatException(SR.Format_IndexOutOfRange);
+
+ // Consume optional whitespace.
+ while (pos < len && (ch = format[pos]) == ' ') pos++;
+ // End of parsing index parameter.
+
+ //
+ // Start of parsing of optional Alignment
+ // Alignment ::= comma WS* minus? ('0'-'9')+ WS*
+ //
+ bool leftJustify = false;
+ int width = 0;
+ // Is the character a comma, which indicates the start of alignment parameter.
+ if (ch == ',')
+ {
+ pos++;
+
+ // Consume Optional whitespace
+ while (pos < len && format[pos] == ' ') pos++;
+
+ // If reached the end of the text then error (Unexpected end of text)
+ if (pos == len) FormatError();
+
+ // Is there a minus sign?
+ ch = format[pos];
+ if (ch == '-')
+ {
+ // Yes, then alignment is left justified.
+ leftJustify = true;
+ pos++;
+ // If reached end of text then error (Unexpected end of text)
+ if (pos == len) FormatError();
+ ch = format[pos];
+ }
+
+ // If current character is not a digit then error (Unexpected character)
+ if (ch < '0' || ch > '9') FormatError();
+ // Parse alignment digits.
+ do
+ {
+ width = width * 10 + ch - '0';
+ pos++;
+ // If reached end of text then error. (Unexpected end of text)
+ if (pos == len) FormatError();
+ ch = format[pos];
+ // So long a current character is a digit and the value of width is less than 100000 ( width limit )
+ } while (ch >= '0' && ch <= '9' && width < Width_Limit);
+ // end of parsing Argument Alignment
+ }
+
+ // Consume optional whitespace
+ while (pos < len && (ch = format[pos]) == ' ') pos++;
+
+ //
+ // Start of parsing of optional formatting parameter.
+ //
+ Object arg = args[index];
+ String itemFormat = null;
+ // Is current character a colon? which indicates start of formatting parameter.
+ if (ch == ':')
+ {
+ pos++;
+ int startPos = pos;
+
+ while (true)
+ {
+ // If reached end of text then error. (Unexpected end of text)
+ if (pos == len) FormatError();
+ ch = format[pos];
+ pos++;
+
+ // Is character a opening or closing brace?
+ if (ch == '}' || ch == '{')
+ {
+ if (ch == '{')
+ {
+ // Yes, is next character also a opening brace, then treat as escaped. eg {{
+ if (pos < len && format[pos] == '{')
+ pos++;
+ else
+ // Error Argument Holes can not be nested.
+ FormatError();
+ }
+ else
+ {
+ // Yes, is next character also a closing brace, then treat as escaped. eg }}
+ if (pos < len && format[pos] == '}')
+ pos++;
+ else
+ {
+ // No, then treat it as the closing brace of an Arg Hole.
+ pos--;
+ break;
+ }
+ }
+
+ // Reaching here means the brace has been escaped
+ // so we need to build up the format string in segments
+ if (unescapedItemFormat == null)
+ {
+ unescapedItemFormat = new StringBuilder();
+ }
+ unescapedItemFormat.Append(format, startPos, pos - startPos - 1);
+ startPos = pos;
+ }
+ }
+
+ if (unescapedItemFormat == null || unescapedItemFormat.Length == 0)
+ {
+ if (startPos != pos)
+ {
+ // There was no brace escaping, extract the item format as a single string
+ itemFormat = format.Substring(startPos, pos - startPos);
+ }
+ }
+ else
+ {
+ unescapedItemFormat.Append(format, startPos, pos - startPos);
+ itemFormat = unescapedItemFormat.ToString();
+ unescapedItemFormat.Clear();
+ }
+ }
+ // If current character is not a closing brace then error. (Unexpected Character)
+ if (ch != '}') FormatError();
+ // Construct the output for this arg hole.
+ pos++;
+ String s = null;
+ if (cf != null)
+ {
+ s = cf.Format(itemFormat, arg, provider);
+ }
+
+ if (s == null)
+ {
+ IFormattable formattableArg = arg as IFormattable;
+
+ if (formattableArg != null)
+ {
+ s = formattableArg.ToString(itemFormat, provider);
+ }
+ else if (arg != null)
+ {
+ s = arg.ToString();
+ }
+ }
+ // Append it to the final output of the Format String.
+ if (s == null) s = String.Empty;
+ int pad = width - s.Length;
+ if (!leftJustify && pad > 0) Append(' ', pad);
+ Append(s);
+ if (leftJustify && pad > 0) Append(' ', pad);
+ // Continue to parse other characters.
+ }
+ return this;
+ }
+
+ // Returns a reference to the current StringBuilder with all instances of oldString
+ // replaced with newString. If startIndex and count are specified,
+ // we only replace strings completely contained in the range of startIndex to startIndex +
+ // count. The strings to be replaced are checked on an ordinal basis (e.g. not culture aware). If
+ // newValue is null, instances of oldValue are removed (e.g. replaced with nothing.).
+ //
+ public StringBuilder Replace(String oldValue, String newValue)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+ return Replace(oldValue, newValue, 0, Length);
+ }
+
+ public bool Equals(StringBuilder sb)
+ {
+ if (sb == null)
+ return false;
+ if (Capacity != sb.Capacity || MaxCapacity != sb.MaxCapacity || Length != sb.Length)
+ return false;
+ if (sb == this)
+ return true;
+
+ StringBuilder thisChunk = this;
+ int thisChunkIndex = thisChunk.m_ChunkLength;
+ StringBuilder sbChunk = sb;
+ int sbChunkIndex = sbChunk.m_ChunkLength;
+ for (;;)
+ {
+ // Decrement the pointer to the 'this' StringBuilder
+ --thisChunkIndex;
+ --sbChunkIndex;
+
+ while (thisChunkIndex < 0)
+ {
+ thisChunk = thisChunk.m_ChunkPrevious;
+ if (thisChunk == null)
+ break;
+ thisChunkIndex = thisChunk.m_ChunkLength + thisChunkIndex;
+ }
+
+ // Decrement the pointer to the 'this' StringBuilder
+ while (sbChunkIndex < 0)
+ {
+ sbChunk = sbChunk.m_ChunkPrevious;
+ if (sbChunk == null)
+ break;
+ sbChunkIndex = sbChunk.m_ChunkLength + sbChunkIndex;
+ }
+
+ if (thisChunkIndex < 0)
+ return sbChunkIndex < 0;
+ if (sbChunkIndex < 0)
+ return false;
+ if (thisChunk.m_ChunkChars[thisChunkIndex] != sbChunk.m_ChunkChars[sbChunkIndex])
+ return false;
+ }
+ }
+
+ public StringBuilder Replace(String oldValue, String newValue, int startIndex, int count)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ int currentLength = Length;
+ if ((uint)startIndex > (uint)currentLength)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
+ }
+ if (count < 0 || startIndex > currentLength - count)
+ {
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Index);
+ }
+ if (oldValue == null)
+ {
+ throw new ArgumentNullException(nameof(oldValue));
+ }
+ if (oldValue.Length == 0)
+ {
+ throw new ArgumentException(SR.Argument_EmptyName, nameof(oldValue));
+ }
+
+ if (newValue == null)
+ newValue = "";
+
+ int deltaLength = newValue.Length - oldValue.Length;
+
+ int[] replacements = null; // A list of replacement positions in a chunk to apply
+ int replacementsCount = 0;
+
+ // Find the chunk, indexInChunk for the starting point
+ StringBuilder chunk = FindChunkForIndex(startIndex);
+ int indexInChunk = startIndex - chunk.m_ChunkOffset;
+ while (count > 0)
+ {
+ // Look for a match in the chunk,indexInChunk pointer
+ if (StartsWith(chunk, indexInChunk, count, oldValue))
+ {
+ // Push it on my replacements array (with growth), we will do all replacements in a
+ // given chunk in one operation below (see ReplaceAllInChunk) so we don't have to slide
+ // many times.
+ if (replacements == null)
+ replacements = new int[5];
+ else if (replacementsCount >= replacements.Length)
+ {
+ Array.Resize(ref replacements, replacements.Length * 3 / 2 + 4); // grow by 1.5X but more in the beginning
+ }
+ replacements[replacementsCount++] = indexInChunk;
+ indexInChunk += oldValue.Length;
+ count -= oldValue.Length;
+ }
+ else
+ {
+ indexInChunk++;
+ --count;
+ }
+
+ if (indexInChunk >= chunk.m_ChunkLength || count == 0) // Have we moved out of the current chunk
+ {
+ // Replacing mutates the blocks, so we need to convert to logical index and back afterward.
+ int index = indexInChunk + chunk.m_ChunkOffset;
+ int indexBeforeAdjustment = index;
+
+ // See if we accumulated any replacements, if so apply them
+ ReplaceAllInChunk(replacements, replacementsCount, chunk, oldValue.Length, newValue);
+ // The replacement has affected the logical index. Adjust it.
+ index += ((newValue.Length - oldValue.Length) * replacementsCount);
+ replacementsCount = 0;
+
+ chunk = FindChunkForIndex(index);
+ indexInChunk = index - chunk.m_ChunkOffset;
+ Debug.Assert(chunk != null || count == 0, "Chunks ended prematurely");
+ }
+ }
+ VerifyClassInvariant();
+ return this;
+ }
+
+ // Returns a StringBuilder with all instances of oldChar replaced with
+ // newChar. The size of the StringBuilder is unchanged because we're only
+ // replacing characters. If startIndex and count are specified, we
+ // only replace characters in the range from startIndex to startIndex+count
+ //
+ public StringBuilder Replace(char oldChar, char newChar)
+ {
+ return Replace(oldChar, newChar, 0, Length);
+ }
+ public StringBuilder Replace(char oldChar, char newChar, int startIndex, int count)
+ {
+ Contract.Ensures(Contract.Result<StringBuilder>() != null);
+
+ int currentLength = Length;
+ if ((uint)startIndex > (uint)currentLength)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
+ }
+
+ if (count < 0 || startIndex > currentLength - count)
+ {
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Index);
+ }
+
+ int endIndex = startIndex + count;
+ StringBuilder chunk = this;
+ for (;;)
+ {
+ int endIndexInChunk = endIndex - chunk.m_ChunkOffset;
+ int startIndexInChunk = startIndex - chunk.m_ChunkOffset;
+ if (endIndexInChunk >= 0)
+ {
+ int curInChunk = Math.Max(startIndexInChunk, 0);
+ int endInChunk = Math.Min(chunk.m_ChunkLength, endIndexInChunk);
+ while (curInChunk < endInChunk)
+ {
+ if (chunk.m_ChunkChars[curInChunk] == oldChar)
+ chunk.m_ChunkChars[curInChunk] = newChar;
+ curInChunk++;
+ }
+ }
+ if (startIndexInChunk >= 0)
+ break;
+ chunk = chunk.m_ChunkPrevious;
+ }
+ return this;
+ }
+
+ /// <summary>
+ /// Appends 'value' of length 'count' to the stringBuilder.
+ /// </summary>
+ [CLSCompliant(false)]
+ public unsafe StringBuilder Append(char* value, int valueCount)
+ {
+ // We don't check null value as this case will throw null reference exception anyway
+ if (valueCount < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(valueCount), SR.ArgumentOutOfRange_NegativeCount);
+ }
+
+ // this is where we can check if the valueCount will put us over m_MaxCapacity
+ // We are doing the check here to prevent the corruption of the StringBuilder.
+ int newLength = Length + valueCount;
+ if (newLength > m_MaxCapacity || newLength < valueCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(valueCount), SR.ArgumentOutOfRange_LengthGreaterThanCapacity);
+ }
+
+ // This case is so common we want to optimize for it heavily.
+ int newIndex = valueCount + m_ChunkLength;
+ if (newIndex <= m_ChunkChars.Length)
+ {
+ ThreadSafeCopy(value, m_ChunkChars, m_ChunkLength, valueCount);
+ m_ChunkLength = newIndex;
+ }
+ else
+ {
+ // Copy the first chunk
+ int firstLength = m_ChunkChars.Length - m_ChunkLength;
+ if (firstLength > 0)
+ {
+ ThreadSafeCopy(value, m_ChunkChars, m_ChunkLength, firstLength);
+ m_ChunkLength = m_ChunkChars.Length;
+ }
+
+ // Expand the builder to add another chunk.
+ int restLength = valueCount - firstLength;
+ ExpandByABlock(restLength);
+ Debug.Assert(m_ChunkLength == 0, "Expand did not make a new block");
+
+ // Copy the second chunk
+ ThreadSafeCopy(value + firstLength, m_ChunkChars, 0, restLength);
+ m_ChunkLength = restLength;
+ }
+ VerifyClassInvariant();
+ return this;
+ }
+
+ /// <summary>
+ /// Inserts 'value' of length 'cou
+ /// </summary>
+ unsafe private void Insert(int index, char* value, int valueCount)
+ {
+ if ((uint)index > (uint)Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
+ }
+
+ if (valueCount > 0)
+ {
+ StringBuilder chunk;
+ int indexInChunk;
+ MakeRoom(index, valueCount, out chunk, out indexInChunk, false);
+ ReplaceInPlaceAtChunk(ref chunk, ref indexInChunk, value, valueCount);
+ }
+ }
+
+ /// <summary>
+ /// 'replacements' is a list of index (relative to the begining of the 'chunk' to remove
+ /// 'removeCount' characters and replace them with 'value'. This routine does all those
+ /// replacements in bulk (and therefore very efficiently.
+ /// with the string 'value'.
+ /// </summary>
+ private void ReplaceAllInChunk(int[] replacements, int replacementsCount, StringBuilder sourceChunk, int removeCount, string value)
+ {
+ if (replacementsCount <= 0)
+ return;
+
+ unsafe
+ {
+ fixed (char* valuePtr = value)
+ {
+ // calculate the total amount of extra space or space needed for all the replacements.
+ int delta = (value.Length - removeCount) * replacementsCount;
+
+ StringBuilder targetChunk = sourceChunk; // the target as we copy chars down
+ int targetIndexInChunk = replacements[0];
+
+ // Make the room needed for all the new characters if needed.
+ if (delta > 0)
+ MakeRoom(targetChunk.m_ChunkOffset + targetIndexInChunk, delta, out targetChunk, out targetIndexInChunk, true);
+ // We made certain that characters after the insertion point are not moved,
+ int i = 0;
+ for (;;)
+ {
+ // Copy in the new string for the ith replacement
+ ReplaceInPlaceAtChunk(ref targetChunk, ref targetIndexInChunk, valuePtr, value.Length);
+ int gapStart = replacements[i] + removeCount;
+ i++;
+ if (i >= replacementsCount)
+ break;
+
+ int gapEnd = replacements[i];
+ Debug.Assert(gapStart < sourceChunk.m_ChunkChars.Length, "gap starts at end of buffer. Should not happen");
+ Debug.Assert(gapStart <= gapEnd, "negative gap size");
+ Debug.Assert(gapEnd <= sourceChunk.m_ChunkLength, "gap too big");
+ if (delta != 0) // can skip the sliding of gaps if source an target string are the same size.
+ {
+ // Copy the gap data between the current replacement and the the next replacement
+ fixed (char* sourcePtr = &sourceChunk.m_ChunkChars[gapStart])
+ ReplaceInPlaceAtChunk(ref targetChunk, ref targetIndexInChunk, sourcePtr, gapEnd - gapStart);
+ }
+ else
+ {
+ targetIndexInChunk += gapEnd - gapStart;
+ Debug.Assert(targetIndexInChunk <= targetChunk.m_ChunkLength, "gap not in chunk");
+ }
+ }
+
+ // Remove extra space if necessary.
+ if (delta < 0)
+ Remove(targetChunk.m_ChunkOffset + targetIndexInChunk, -delta, out targetChunk, out targetIndexInChunk);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Returns true if the string that is starts at 'chunk' and 'indexInChunk, and has a logical
+ /// length of 'count' starts with the string 'value'.
+ /// </summary>
+ private bool StartsWith(StringBuilder chunk, int indexInChunk, int count, string value)
+ {
+ for (int i = 0; i < value.Length; i++)
+ {
+ if (count == 0)
+ return false;
+ if (indexInChunk >= chunk.m_ChunkLength)
+ {
+ chunk = Next(chunk);
+ if (chunk == null)
+ return false;
+ indexInChunk = 0;
+ }
+
+ // See if there no match, break out of the inner for loop
+ if (value[i] != chunk.m_ChunkChars[indexInChunk])
+ return false;
+
+ indexInChunk++;
+ --count;
+ }
+ return true;
+ }
+
+ /// <summary>
+ /// ReplaceInPlaceAtChunk is the logical equivalent of 'memcpy'. Given a chunk and ann index in
+ /// that chunk, it copies in 'count' characters from 'value' and updates 'chunk, and indexInChunk to
+ /// point at the end of the characters just copyied (thus you can splice in strings from multiple
+ /// places by calling this mulitple times.
+ /// </summary>
+ unsafe private void ReplaceInPlaceAtChunk(ref StringBuilder chunk, ref int indexInChunk, char* value, int count)
+ {
+ if (count != 0)
+ {
+ for (;;)
+ {
+ int lengthInChunk = chunk.m_ChunkLength - indexInChunk;
+ Debug.Assert(lengthInChunk >= 0, "index not in chunk");
+
+ int lengthToCopy = Math.Min(lengthInChunk, count);
+ ThreadSafeCopy(value, chunk.m_ChunkChars, indexInChunk, lengthToCopy);
+
+ // Advance the index.
+ indexInChunk += lengthToCopy;
+ if (indexInChunk >= chunk.m_ChunkLength)
+ {
+ chunk = Next(chunk);
+ indexInChunk = 0;
+ }
+ count -= lengthToCopy;
+ if (count == 0)
+ break;
+ value += lengthToCopy;
+ }
+ }
+ }
+
+ /// <summary>
+ /// We have to prevent modification off the end of an array.
+ /// The only way to do this is to copy all interesting variables out of the heap and then do the
+ /// bounds check. This is what we do here.
+ /// </summary>
+ private static unsafe void ThreadSafeCopy(char* sourcePtr, char[] destination, int destinationIndex, int count)
+ {
+ if (count > 0)
+ {
+ if ((uint)destinationIndex <= (uint)destination.Length && (destinationIndex + count) <= destination.Length)
+ {
+ fixed (char* destinationPtr = &destination[destinationIndex])
+ string.wstrcpy(destinationPtr, sourcePtr, count);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(destinationIndex), SR.ArgumentOutOfRange_Index);
+ }
+ }
+ }
+
+ private static void ThreadSafeCopy(char[] source, int sourceIndex, char[] destination, int destinationIndex, int count)
+ {
+ if (count > 0)
+ {
+ if ((uint)sourceIndex <= (uint)source.Length && (sourceIndex + count) <= source.Length)
+ {
+ unsafe
+ {
+ fixed (char* sourcePtr = &source[sourceIndex])
+ ThreadSafeCopy(sourcePtr, destination, destinationIndex, count);
+ }
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(sourceIndex), SR.ArgumentOutOfRange_Index);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Finds the chunk for the logical index (number of characters in the whole stringbuilder) 'index'
+ /// YOu can then get the offset in this chunk by subtracting the m_BlockOffset field from 'index'
+ /// </summary>
+ /// <param name="index"></param>
+ /// <returns></returns>
+ private StringBuilder FindChunkForIndex(int index)
+ {
+ Debug.Assert(0 <= index && index <= Length, "index not in string");
+
+ StringBuilder ret = this;
+ while (ret.m_ChunkOffset > index)
+ ret = ret.m_ChunkPrevious;
+
+ Debug.Assert(ret != null, "index not in string");
+ return ret;
+ }
+
+ /// <summary>
+ /// Finds the chunk for the logical byte index 'byteIndex'
+ /// </summary>
+ /// <param name="index"></param>
+ /// <returns></returns>
+ private StringBuilder FindChunkForByte(int byteIndex)
+ {
+ Debug.Assert(0 <= byteIndex && byteIndex <= Length * sizeof(char), "Byte Index not in string");
+
+ StringBuilder ret = this;
+ while (ret.m_ChunkOffset * sizeof(char) > byteIndex)
+ ret = ret.m_ChunkPrevious;
+
+ Debug.Assert(ret != null, "Byte Index not in string");
+ return ret;
+ }
+
+ /// <summary>
+ /// Finds the chunk that logically follows the 'chunk' chunk. Chunks only persist the pointer to
+ /// the chunk that is logically before it, so this routine has to start at the this pointer (which
+ /// is a assumed to point at the chunk representing the whole stringbuilder) and search
+ /// until it finds the current chunk (thus is O(n)). So it is more expensive than a field fetch!
+ /// </summary>
+ private StringBuilder Next(StringBuilder chunk)
+ {
+ if (chunk == this)
+ return null;
+ return FindChunkForIndex(chunk.m_ChunkOffset + chunk.m_ChunkLength);
+ }
+
+ /// <summary>
+ /// Assumes that 'this' is the last chunk in the list and that it is full. Upon return the 'this'
+ /// block is updated so that it is a new block that has at least 'minBlockCharCount' characters.
+ /// that can be used to copy characters into it.
+ /// </summary>
+ private void ExpandByABlock(int minBlockCharCount)
+ {
+ Contract.Requires(Capacity == Length, "Expand expect to be called only when there is no space left"); // We are currently full
+ Contract.Requires(minBlockCharCount > 0, "Expansion request must be positive");
+
+ VerifyClassInvariant();
+
+ if ((minBlockCharCount + Length) > m_MaxCapacity || minBlockCharCount + Length < minBlockCharCount)
+ throw new ArgumentOutOfRangeException("requiredLength", SR.ArgumentOutOfRange_SmallCapacity);
+
+ // Compute the length of the new block we need
+ // We make the new chunk at least big enough for the current need (minBlockCharCount)
+ // But also as big as the current length (thus doubling capacity), up to a maximum
+ // (so we stay in the small object heap, and never allocate really big chunks even if
+ // the string gets really big.
+ int newBlockLength = Math.Max(minBlockCharCount, Math.Min(Length, MaxChunkSize));
+
+ // Copy the current block to the new block, and initialize this to point at the new buffer.
+ m_ChunkPrevious = new StringBuilder(this);
+ m_ChunkOffset += m_ChunkLength;
+ m_ChunkLength = 0;
+
+ // Check for integer overflow (logical buffer size > int.MaxInt)
+ if (m_ChunkOffset + newBlockLength < newBlockLength)
+ {
+ m_ChunkChars = null;
+ throw new OutOfMemoryException();
+ }
+ m_ChunkChars = new char[newBlockLength];
+
+ VerifyClassInvariant();
+ }
+
+ /// <summary>
+ /// Used by ExpandByABlock to create a new chunk. The new chunk is a copied from 'from'
+ /// In particular the buffer is shared. It is expected that 'from' chunk (which represents
+ /// the whole list, is then updated to point to point to this new chunk.
+ /// </summary>
+ private StringBuilder(StringBuilder from)
+ {
+ m_ChunkLength = from.m_ChunkLength;
+ m_ChunkOffset = from.m_ChunkOffset;
+ m_ChunkChars = from.m_ChunkChars;
+ m_ChunkPrevious = from.m_ChunkPrevious;
+ m_MaxCapacity = from.m_MaxCapacity;
+ VerifyClassInvariant();
+ }
+
+ /// <summary>
+ /// Creates a gap of size 'count' at the logical offset (count of characters in the whole string
+ /// builder) 'index'. It returns the 'chunk' and 'indexInChunk' which represents a pointer to
+ /// this gap that was just created. You can then use 'ReplaceInPlaceAtChunk' to fill in the
+ /// chunk
+ ///
+ /// ReplaceAllChunks relies on the fact that indexes above 'index' are NOT moved outside 'chunk'
+ /// by this process (because we make the space by creating the cap BEFORE the chunk). If we
+ /// change this ReplaceAllChunks needs to be updated.
+ ///
+ /// If dontMoveFollowingChars is true, then the room must be made by inserting a chunk BEFORE the
+ /// current chunk (this is what it does most of the time anyway)
+ /// </summary>
+ private void MakeRoom(int index, int count, out StringBuilder chunk, out int indexInChunk, bool doneMoveFollowingChars)
+ {
+ VerifyClassInvariant();
+ Debug.Assert(count > 0, "Count must be strictly positive");
+ Debug.Assert(index >= 0, "Index can't be negative");
+ if (count + Length > m_MaxCapacity || count + Length < count)
+ throw new ArgumentOutOfRangeException("requiredLength", SR.ArgumentOutOfRange_SmallCapacity);
+
+ chunk = this;
+ while (chunk.m_ChunkOffset > index)
+ {
+ chunk.m_ChunkOffset += count;
+ chunk = chunk.m_ChunkPrevious;
+ }
+ indexInChunk = index - chunk.m_ChunkOffset;
+
+ // Cool, we have some space in this block, and you don't have to copy much to get it, go ahead
+ // and use it. This happens typically when you repeatedly insert small strings at a spot
+ // (typically the absolute front) of the buffer.
+ if (!doneMoveFollowingChars && chunk.m_ChunkLength <= DefaultCapacity * 2 && chunk.m_ChunkChars.Length - chunk.m_ChunkLength >= count)
+ {
+ for (int i = chunk.m_ChunkLength; i > indexInChunk;)
+ {
+ --i;
+ chunk.m_ChunkChars[i + count] = chunk.m_ChunkChars[i];
+ }
+ chunk.m_ChunkLength += count;
+ return;
+ }
+
+ // Allocate space for the new chunk (will go before this one)
+ StringBuilder newChunk = new StringBuilder(Math.Max(count, DefaultCapacity), chunk.m_MaxCapacity, chunk.m_ChunkPrevious);
+ newChunk.m_ChunkLength = count;
+
+ // Copy the head of the buffer to the new buffer.
+ int copyCount1 = Math.Min(count, indexInChunk);
+ if (copyCount1 > 0)
+ {
+ unsafe
+ {
+ fixed (char* chunkCharsPtr = &chunk.m_ChunkChars[0])
+ {
+ ThreadSafeCopy(chunkCharsPtr, newChunk.m_ChunkChars, 0, copyCount1);
+
+ // Slide characters in the current buffer over to make room.
+ int copyCount2 = indexInChunk - copyCount1;
+ if (copyCount2 >= 0)
+ {
+ ThreadSafeCopy(chunkCharsPtr + copyCount1, chunk.m_ChunkChars, 0, copyCount2);
+ indexInChunk = copyCount2;
+ }
+ }
+ }
+ }
+
+ chunk.m_ChunkPrevious = newChunk; // Wire in the new chunk
+ chunk.m_ChunkOffset += count;
+ if (copyCount1 < count)
+ {
+ chunk = newChunk;
+ indexInChunk = copyCount1;
+ }
+
+ VerifyClassInvariant();
+ }
+
+ /// <summary>
+ /// Used by MakeRoom to allocate another chunk.
+ /// </summary>
+ private StringBuilder(int size, int maxCapacity, StringBuilder previousBlock)
+ {
+ Debug.Assert(size > 0, "size not positive");
+ Debug.Assert(maxCapacity > 0, "maxCapacity not positive");
+ m_ChunkChars = new char[size];
+ m_MaxCapacity = maxCapacity;
+ m_ChunkPrevious = previousBlock;
+ if (previousBlock != null)
+ m_ChunkOffset = previousBlock.m_ChunkOffset + previousBlock.m_ChunkLength;
+ VerifyClassInvariant();
+ }
+
+ /// <summary>
+ /// Removes 'count' characters from the logical index 'startIndex' and returns the chunk and
+ /// index in the chunk of that logical index in the out parameters.
+ /// </summary>
+ private void Remove(int startIndex, int count, out StringBuilder chunk, out int indexInChunk)
+ {
+ VerifyClassInvariant();
+ Debug.Assert(startIndex >= 0 && startIndex < Length, "startIndex not in string");
+
+ int endIndex = startIndex + count;
+
+ // Find the chunks for the start and end of the block to delete.
+ chunk = this;
+ StringBuilder endChunk = null;
+ int endIndexInChunk = 0;
+ for (;;)
+ {
+ if (endIndex - chunk.m_ChunkOffset >= 0)
+ {
+ if (endChunk == null)
+ {
+ endChunk = chunk;
+ endIndexInChunk = endIndex - endChunk.m_ChunkOffset;
+ }
+ if (startIndex - chunk.m_ChunkOffset >= 0)
+ {
+ indexInChunk = startIndex - chunk.m_ChunkOffset;
+ break;
+ }
+ }
+ else
+ {
+ chunk.m_ChunkOffset -= count;
+ }
+ chunk = chunk.m_ChunkPrevious;
+ }
+ Debug.Assert(chunk != null, "fell off beginning of string!");
+
+ int copyTargetIndexInChunk = indexInChunk;
+ int copyCount = endChunk.m_ChunkLength - endIndexInChunk;
+ if (endChunk != chunk)
+ {
+ copyTargetIndexInChunk = 0;
+ // Remove the characters after startIndex to end of the chunk
+ chunk.m_ChunkLength = indexInChunk;
+
+ // Remove the characters in chunks between start and end chunk
+ endChunk.m_ChunkPrevious = chunk;
+ endChunk.m_ChunkOffset = chunk.m_ChunkOffset + chunk.m_ChunkLength;
+
+ // If the start is 0 then we can throw away the whole start chunk
+ if (indexInChunk == 0)
+ {
+ endChunk.m_ChunkPrevious = chunk.m_ChunkPrevious;
+ chunk = endChunk;
+ }
+ }
+ endChunk.m_ChunkLength -= (endIndexInChunk - copyTargetIndexInChunk);
+
+ // SafeCritical: We ensure that endIndexInChunk + copyCount is within range of m_ChunkChars and
+ // also ensure that copyTargetIndexInChunk + copyCount is within the chunk
+ //
+ // Remove any characters in the end chunk, by sliding the characters down.
+ if (copyTargetIndexInChunk != endIndexInChunk) // Sometimes no move is necessary
+ ThreadSafeCopy(endChunk.m_ChunkChars, endIndexInChunk, endChunk.m_ChunkChars, copyTargetIndexInChunk, copyCount);
+
+ Debug.Assert(chunk != null, "fell off beginning of string!");
+ VerifyClassInvariant();
+ }
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/UTF32Encoding.cs b/src/mscorlib/shared/System/Text/UTF32Encoding.cs
new file mode 100644
index 0000000000..e4cd6c960e
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/UTF32Encoding.cs
@@ -0,0 +1,1234 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
+//
+
+using System;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+using System.Globalization;
+
+namespace System.Text
+{
+ // Encodes text into and out of UTF-32. UTF-32 is a way of writing
+ // Unicode characters with a single storage unit (32 bits) per character,
+ //
+ // The UTF-32 byte order mark is simply the Unicode byte order mark
+ // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
+ // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
+ // switch the byte orderings.
+
+ [Serializable]
+ public sealed class UTF32Encoding : Encoding
+ {
+ /*
+ words bits UTF-32 representation
+ ----- ---- -----------------------------------
+ 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
+ 2 21 00000000 000xxxxx hhhhhhll llllllll
+ ----- ---- -----------------------------------
+
+ Surrogate:
+ Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
+ */
+
+ // Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
+ // The initialization code will not be run until a static member of the class is referenced
+ internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
+ internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
+
+ private bool _emitUTF32ByteOrderMark = false;
+ private bool _isThrowException = false;
+ private bool _bigEndian = false;
+
+
+ public UTF32Encoding() : this(false, true, false)
+ {
+ }
+
+
+ public UTF32Encoding(bool bigEndian, bool byteOrderMark) :
+ this(bigEndian, byteOrderMark, false)
+ {
+ }
+
+
+ public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters) :
+ base(bigEndian ? 12001 : 12000)
+ {
+ _bigEndian = bigEndian;
+ _emitUTF32ByteOrderMark = byteOrderMark;
+ _isThrowException = throwOnInvalidCharacters;
+
+ // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
+ if (_isThrowException)
+ SetDefaultFallbacks();
+ }
+
+ internal override void SetDefaultFallbacks()
+ {
+ // For UTF-X encodings, we use a replacement fallback with an empty string
+ if (_isThrowException)
+ {
+ this.encoderFallback = EncoderFallback.ExceptionFallback;
+ this.decoderFallback = DecoderFallback.ExceptionFallback;
+ }
+ else
+ {
+ this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
+ this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
+ }
+ }
+
+
+ // The following methods are copied from EncodingNLS.cs.
+ // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
+ // These should be kept in sync for the following classes:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ // Returns the number of bytes required to encode a range of characters in
+ // a character array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(char[] chars, int index, int count)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - index < count)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input, return 0, avoid fixed empty array problem
+ if (count == 0)
+ return 0;
+
+ // Just call the pointer version
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(String s)
+ {
+ // Validate input
+ if (s==null)
+ throw new ArgumentNullException("s");
+ Contract.EndContractBlock();
+
+ fixed (char* pChars = s)
+ return GetByteCount(pChars, s.Length, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetByteCount(char* chars, int count)
+ {
+ // Validate Parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Call it with empty encoder
+ return GetByteCount(chars, count, null);
+ }
+
+ // Parent method is safe.
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ public override unsafe int GetBytes(String s, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ if (s == null || bytes == null)
+ throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (s.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fix our input array if 0 length because fixed doesn't like 0 length arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. An exception occurs if the byte array is not large
+ // enough to hold the complete encoding of the characters. The
+ // GetByteCount method can be used to determine the exact number of
+ // bytes that will be produced for a given range of characters.
+ // Alternatively, the GetMaxByteCount method can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ // Validate parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If nothing to encode return 0, avoid fixed problem
+ if (charCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fix our input array if 0 length because fixed doesn't like 0 length arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
+ // Remember that byteCount is # to decode, not size of array.
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetBytes(chars, charCount, bytes, byteCount, null);
+ }
+
+ // Returns the number of characters produced by decoding a range of bytes
+ // in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetCharCount(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input just return 0, fixed doesn't like 0 length arrays.
+ if (count == 0)
+ return 0;
+
+ // Just call pointer version
+ fixed (byte* pBytes = bytes)
+ return GetCharCount(pBytes + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetCharCount(byte* bytes, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetCharCount(bytes, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if ( bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (charIndex < 0 || charIndex > chars.Length)
+ throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If no input, return 0 & avoid fixed problem
+ if (byteCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int charCount = chars.Length - charIndex;
+
+ // Fix our input array if 0 length because fixed doesn't like 0 length arrays
+ if (chars.Length == 0)
+ chars = new char[1];
+
+ fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
+ // Remember that charCount is # to decode, not size of array
+ return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetChars(bytes, byteCount, chars, charCount, null);
+ }
+
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe String GetString(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // Avoid problems with empty input buffer
+ if (count == 0) return String.Empty;
+
+ fixed (byte* pBytes = bytes)
+ return String.CreateStringFromEncoding(
+ pBytes + index, count, this);
+ }
+
+ //
+ // End of standard methods copied from EncodingNLS.cs
+ //
+
+ internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
+ {
+ Debug.Assert(chars != null, "[UTF32Encoding.GetByteCount]chars!=null");
+ Debug.Assert(count >= 0, "[UTF32Encoding.GetByteCount]count >=0");
+
+ char* end = chars + count;
+ char* charStart = chars;
+ int byteCount = 0;
+
+ char highSurrogate = '\0';
+
+ // For fallback we may need a fallback buffer
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ if (encoder != null)
+ {
+ highSurrogate = encoder.charLeftOver;
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // We mustn't have left over fallback data when counting
+ if (fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+ }
+ else
+ {
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ }
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, end, encoder, false);
+
+ char ch;
+ TryAgain:
+
+ while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Do we need a low surrogate?
+ if (highSurrogate != '\0')
+ {
+ //
+ // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
+ //
+ if (Char.IsLowSurrogate(ch))
+ {
+ // They're all legal
+ highSurrogate = '\0';
+
+ //
+ // One surrogate pair will be translated into 4 bytes UTF32.
+ //
+
+ byteCount += 4;
+ continue;
+ }
+
+ // We are missing our low surrogate, decrement chars and fallback the high surrogate
+ // The high surrogate may have come from the encoder, but nothing else did.
+ Debug.Assert(chars > charStart,
+ "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
+ chars--;
+
+ // Do the fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
+ chars = charsForFallback;
+
+ // We're going to fallback the old high surrogate.
+ highSurrogate = '\0';
+ continue;
+ }
+
+ // Do we have another high surrogate?
+ if (Char.IsHighSurrogate(ch))
+ {
+ //
+ // We'll have a high surrogate to check next time.
+ //
+ highSurrogate = ch;
+ continue;
+ }
+
+ // Check for illegal characters
+ if (Char.IsLowSurrogate(ch))
+ {
+ // We have a leading low surrogate, do the fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Try again with fallback buffer
+ continue;
+ }
+
+ // We get to add the character (4 bytes UTF32)
+ byteCount += 4;
+ }
+
+ // May have to do our last surrogate
+ if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
+ {
+ // We have to do the fallback for the lonely high surrogate
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
+ chars = charsForFallback;
+
+ highSurrogate = (char)0;
+ goto TryAgain;
+ }
+
+ // Check for overflows.
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ // Shouldn't have anything in fallback buffer for GetByteCount
+ // (don't have to check m_throwOnOverflow for count)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
+
+ // Return our count
+ return byteCount;
+ }
+
+ internal override unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, EncoderNLS encoder)
+ {
+ Debug.Assert(chars != null, "[UTF32Encoding.GetBytes]chars!=null");
+ Debug.Assert(bytes != null, "[UTF32Encoding.GetBytes]bytes!=null");
+ Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetBytes]byteCount >=0");
+ Debug.Assert(charCount >= 0, "[UTF32Encoding.GetBytes]charCount >=0");
+
+ char* charStart = chars;
+ char* charEnd = chars + charCount;
+ byte* byteStart = bytes;
+ byte* byteEnd = bytes + byteCount;
+
+ char highSurrogate = '\0';
+
+ // For fallback we may need a fallback buffer
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ if (encoder != null)
+ {
+ highSurrogate = encoder.charLeftOver;
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // We mustn't have left over fallback data when not converting
+ if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+ }
+ else
+ {
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ }
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
+
+ char ch;
+ TryAgain:
+
+ while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Do we need a low surrogate?
+ if (highSurrogate != '\0')
+ {
+ //
+ // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
+ //
+ if (Char.IsLowSurrogate(ch))
+ {
+ // Is it a legal one?
+ uint iTemp = GetSurrogate(highSurrogate, ch);
+ highSurrogate = '\0';
+
+ //
+ // One surrogate pair will be translated into 4 bytes UTF32.
+ //
+ if (bytes + 3 >= byteEnd)
+ {
+ // Don't have 4 bytes
+ if (fallbackBuffer.bFallingBack)
+ {
+ fallbackBuffer.MovePrevious(); // Aren't using these 2 fallback chars
+ fallbackBuffer.MovePrevious();
+ }
+ else
+ {
+ // If we don't have enough room, then either we should've advanced a while
+ // or we should have bytes==byteStart and throw below
+ Debug.Assert(chars > charStart + 1 || bytes == byteStart,
+ "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
+ chars -= 2; // Aren't using those 2 chars
+ }
+ ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
+ highSurrogate = (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
+ break;
+ }
+
+ if (_bigEndian)
+ {
+ *(bytes++) = (byte)(0x00);
+ *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
+ *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
+ *(bytes++) = (byte)(iTemp); // Implies & 0xFF
+ }
+ else
+ {
+ *(bytes++) = (byte)(iTemp); // Implies & 0xFF
+ *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
+ *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
+ *(bytes++) = (byte)(0x00);
+ }
+ continue;
+ }
+
+ // We are missing our low surrogate, decrement chars and fallback the high surrogate
+ // The high surrogate may have come from the encoder, but nothing else did.
+ Debug.Assert(chars > charStart,
+ "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
+ chars--;
+
+ // Do the fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
+ chars = charsForFallback;
+
+ // We're going to fallback the old high surrogate.
+ highSurrogate = '\0';
+ continue;
+ }
+
+ // Do we have another high surrogate?, if so remember it
+ if (Char.IsHighSurrogate(ch))
+ {
+ //
+ // We'll have a high surrogate to check next time.
+ //
+ highSurrogate = ch;
+ continue;
+ }
+
+ // Check for illegal characters (low surrogate)
+ if (Char.IsLowSurrogate(ch))
+ {
+ // We have a leading low surrogate, do the fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Try again with fallback buffer
+ continue;
+ }
+
+ // We get to add the character, yippee.
+ if (bytes + 3 >= byteEnd)
+ {
+ // Don't have 4 bytes
+ if (fallbackBuffer.bFallingBack)
+ fallbackBuffer.MovePrevious(); // Aren't using this fallback char
+ else
+ {
+ // Must've advanced already
+ Debug.Assert(chars > charStart,
+ "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
+ chars--; // Aren't using this char
+ }
+ ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
+ break; // Didn't throw, stop
+ }
+
+ if (_bigEndian)
+ {
+ *(bytes++) = (byte)(0x00);
+ *(bytes++) = (byte)(0x00);
+ *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
+ *(bytes++) = (byte)(ch); // Implies & 0xFF
+ }
+ else
+ {
+ *(bytes++) = (byte)(ch); // Implies & 0xFF
+ *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
+ *(bytes++) = (byte)(0x00);
+ *(bytes++) = (byte)(0x00);
+ }
+ }
+
+ // May have to do our last surrogate
+ if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
+ {
+ // We have to do the fallback for the lonely high surrogate
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
+ chars = charsForFallback;
+
+ highSurrogate = (char)0;
+ goto TryAgain;
+ }
+
+ // Fix our encoder if we have one
+ Debug.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
+ "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
+
+ if (encoder != null)
+ {
+ // Remember our left over surrogate (or 0 if flushing)
+ encoder.charLeftOver = highSurrogate;
+
+ // Need # chars used
+ encoder.m_charsUsed = (int)(chars - charStart);
+ }
+
+ // return the new length
+ return (int)(bytes - byteStart);
+ }
+
+ internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(bytes != null, "[UTF32Encoding.GetCharCount]bytes!=null");
+ Debug.Assert(count >= 0, "[UTF32Encoding.GetCharCount]count >=0");
+
+ UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
+
+ // None so far!
+ int charCount = 0;
+ byte* end = bytes + count;
+ byte* byteStart = bytes;
+
+ // Set up decoder
+ int readCount = 0;
+ uint iChar = 0;
+
+ // For fallback we may need a fallback buffer
+ DecoderFallbackBuffer fallbackBuffer = null;
+
+ // See if there's anything in our decoder
+ if (decoder != null)
+ {
+ readCount = decoder.readByteCount;
+ iChar = (uint)decoder.iChar;
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Shouldn't have anything in fallback buffer for GetCharCount
+ // (don't have to check m_throwOnOverflow for chars or count)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
+ }
+ else
+ {
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ }
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, null);
+
+ // Loop through our input, 4 characters at a time!
+ while (bytes < end && charCount >= 0)
+ {
+ // Get our next character
+ if (_bigEndian)
+ {
+ // Scoot left and add it to the bottom
+ iChar <<= 8;
+ iChar += *(bytes++);
+ }
+ else
+ {
+ // Scoot right and add it to the top
+ iChar >>= 8;
+ iChar += (uint)(*(bytes++)) << 24;
+ }
+
+ readCount++;
+
+ // See if we have all the bytes yet
+ if (readCount < 4)
+ continue;
+
+ // Have the bytes
+ readCount = 0;
+
+ // See if its valid to encode
+ if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
+ {
+ // Need to fall back these 4 bytes
+ byte[] fallbackBytes;
+ if (_bigEndian)
+ {
+ fallbackBytes = new byte[] {
+ unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
+ unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
+ }
+ else
+ {
+ fallbackBytes = new byte[] {
+ unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
+ unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
+ }
+
+ charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
+
+ // Ignore the illegal character
+ iChar = 0;
+ continue;
+ }
+
+ // Ok, we have something we can add to our output
+ if (iChar >= 0x10000)
+ {
+ // Surrogates take 2
+ charCount++;
+ }
+
+ // Add the rest of the surrogate or our normal character
+ charCount++;
+
+ // iChar is back to 0
+ iChar = 0;
+ }
+
+ // See if we have something left over that has to be decoded
+ if (readCount > 0 && (decoder == null || decoder.MustFlush))
+ {
+ // Oops, there's something left over with no place to go.
+ byte[] fallbackBytes = new byte[readCount];
+ if (_bigEndian)
+ {
+ while (readCount > 0)
+ {
+ fallbackBytes[--readCount] = unchecked((byte)iChar);
+ iChar >>= 8;
+ }
+ }
+ else
+ {
+ while (readCount > 0)
+ {
+ fallbackBytes[--readCount] = unchecked((byte)(iChar >> 24));
+ iChar <<= 8;
+ }
+ }
+
+ charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
+ }
+
+ // Check for overflows.
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ // Shouldn't have anything in fallback buffer for GetCharCount
+ // (don't have to check m_throwOnOverflow for chars or count)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
+
+ // Return our count
+ return charCount;
+ }
+
+ internal override unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(chars != null, "[UTF32Encoding.GetChars]chars!=null");
+ Debug.Assert(bytes != null, "[UTF32Encoding.GetChars]bytes!=null");
+ Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetChars]byteCount >=0");
+ Debug.Assert(charCount >= 0, "[UTF32Encoding.GetChars]charCount >=0");
+
+ UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
+
+ // None so far!
+ char* charStart = chars;
+ char* charEnd = chars + charCount;
+
+ byte* byteStart = bytes;
+ byte* byteEnd = bytes + byteCount;
+
+ // See if there's anything in our decoder (but don't clear it yet)
+ int readCount = 0;
+ uint iChar = 0;
+
+ // For fallback we may need a fallback buffer
+ DecoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ // See if there's anything in our decoder
+ if (decoder != null)
+ {
+ readCount = decoder.readByteCount;
+ iChar = (uint)decoder.iChar;
+ fallbackBuffer = baseDecoder.FallbackBuffer;
+
+ // Shouldn't have anything in fallback buffer for GetChars
+ // (don't have to check m_throwOnOverflow for chars)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
+ }
+ else
+ {
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ }
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(bytes, chars + charCount);
+
+ // Loop through our input, 4 characters at a time!
+ while (bytes < byteEnd)
+ {
+ // Get our next character
+ if (_bigEndian)
+ {
+ // Scoot left and add it to the bottom
+ iChar <<= 8;
+ iChar += *(bytes++);
+ }
+ else
+ {
+ // Scoot right and add it to the top
+ iChar >>= 8;
+ iChar += (uint)(*(bytes++)) << 24;
+ }
+
+ readCount++;
+
+ // See if we have all the bytes yet
+ if (readCount < 4)
+ continue;
+
+ // Have the bytes
+ readCount = 0;
+
+ // See if its valid to encode
+ if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
+ {
+ // Need to fall back these 4 bytes
+ byte[] fallbackBytes;
+ if (_bigEndian)
+ {
+ fallbackBytes = new byte[] {
+ unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
+ unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
+ }
+ else
+ {
+ fallbackBytes = new byte[] {
+ unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
+ unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
+ }
+
+ // Chars won't be updated unless this works.
+ charsForFallback = chars;
+ bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+
+ // Couldn't fallback, throw or wait til next time
+ // We either read enough bytes for bytes-=4 to work, or we're
+ // going to throw in ThrowCharsOverflow because chars == charStart
+ Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
+ "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
+ bytes -= 4; // get back to where we were
+ iChar = 0; // Remembering nothing
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // Stop here, didn't throw
+ }
+
+ // Ignore the illegal character
+ iChar = 0;
+ continue;
+ }
+
+
+ // Ok, we have something we can add to our output
+ if (iChar >= 0x10000)
+ {
+ // Surrogates take 2
+ if (chars >= charEnd - 1)
+ {
+ // Throwing or stopping
+ // We either read enough bytes for bytes-=4 to work, or we're
+ // going to throw in ThrowCharsOverflow because chars == charStart
+ Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
+ "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
+ bytes -= 4; // get back to where we were
+ iChar = 0; // Remembering nothing
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // Stop here, didn't throw
+ }
+
+ *(chars++) = GetHighSurrogate(iChar);
+ iChar = GetLowSurrogate(iChar);
+ }
+ // Bounds check for normal character
+ else if (chars >= charEnd)
+ {
+ // Throwing or stopping
+ // We either read enough bytes for bytes-=4 to work, or we're
+ // going to throw in ThrowCharsOverflow because chars == charStart
+ Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
+ "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
+ bytes -= 4; // get back to where we were
+ iChar = 0; // Remembering nothing
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // Stop here, didn't throw
+ }
+
+ // Add the rest of the surrogate or our normal character
+ *(chars++) = (char)iChar;
+
+ // iChar is back to 0
+ iChar = 0;
+ }
+
+ // See if we have something left over that has to be decoded
+ if (readCount > 0 && (decoder == null || decoder.MustFlush))
+ {
+ // Oops, there's something left over with no place to go.
+ byte[] fallbackBytes = new byte[readCount];
+ int tempCount = readCount;
+ if (_bigEndian)
+ {
+ while (tempCount > 0)
+ {
+ fallbackBytes[--tempCount] = unchecked((byte)iChar);
+ iChar >>= 8;
+ }
+ }
+ else
+ {
+ while (tempCount > 0)
+ {
+ fallbackBytes[--tempCount] = unchecked((byte)(iChar >> 24));
+ iChar <<= 8;
+ }
+ }
+
+ charsForFallback = chars;
+ bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+ // Couldn't fallback.
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ // Stop here, didn't throw, backed up, so still nothing in buffer
+ }
+ else
+ {
+ // Don't clear our decoder unless we could fall it back.
+ // If we caught the if above, then we're a convert() and will catch this next time.
+ readCount = 0;
+ iChar = 0;
+ }
+ }
+
+ // Remember any left over stuff, clearing buffer as well for MustFlush
+ if (decoder != null)
+ {
+ decoder.iChar = (int)iChar;
+ decoder.readByteCount = readCount;
+ decoder.m_bytesUsed = (int)(bytes - byteStart);
+ }
+
+ // Shouldn't have anything in fallback buffer for GetChars
+ // (don't have to check m_throwOnOverflow for chars)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
+
+ // Return our count
+ return (int)(chars - charStart);
+ }
+
+
+ private uint GetSurrogate(char cHigh, char cLow)
+ {
+ return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
+ }
+
+ private char GetHighSurrogate(uint iChar)
+ {
+ return (char)((iChar - 0x10000) / 0x400 + 0xD800);
+ }
+
+ private char GetLowSurrogate(uint iChar)
+ {
+ return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
+ }
+
+
+ public override Decoder GetDecoder()
+ {
+ return new UTF32Decoder(this);
+ }
+
+
+ public override Encoder GetEncoder()
+ {
+ return new EncoderNLS(this);
+ }
+
+
+ public override int GetMaxByteCount(int charCount)
+ {
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(charCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
+ long byteCount = (long)charCount + 1;
+
+ if (EncoderFallback.MaxCharCount > 1)
+ byteCount *= EncoderFallback.MaxCharCount;
+
+ // 4 bytes per char
+ byteCount *= 4;
+
+ if (byteCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ return (int)byteCount;
+ }
+
+
+ public override int GetMaxCharCount(int byteCount)
+ {
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(byteCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
+ // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
+ // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
+ int charCount = (byteCount / 2) + 2;
+
+ // Also consider fallback because our input bytes could be out of range of unicode.
+ // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
+ if (DecoderFallback.MaxCharCount > 2)
+ {
+ // Multiply time fallback size
+ charCount *= DecoderFallback.MaxCharCount;
+
+ // We were already figuring 2 chars per 4 bytes, but fallback will be different #
+ charCount /= 2;
+ }
+
+ if (charCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
+
+ return (int)charCount;
+ }
+
+
+ public override byte[] GetPreamble()
+ {
+ if (_emitUTF32ByteOrderMark)
+ {
+ // Allocate new array to prevent users from modifying it.
+ if (_bigEndian)
+ {
+ return new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
+ }
+ else
+ {
+ return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
+ }
+ }
+ else
+ return Array.Empty<byte>();
+ }
+
+
+ public override bool Equals(Object value)
+ {
+ UTF32Encoding that = value as UTF32Encoding;
+ if (that != null)
+ {
+ return (_emitUTF32ByteOrderMark == that._emitUTF32ByteOrderMark) &&
+ (_bigEndian == that._bigEndian) &&
+ (EncoderFallback.Equals(that.EncoderFallback)) &&
+ (DecoderFallback.Equals(that.DecoderFallback));
+ }
+ return (false);
+ }
+
+
+ public override int GetHashCode()
+ {
+ //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
+ return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
+ CodePage + (_emitUTF32ByteOrderMark ? 4 : 0) + (_bigEndian ? 8 : 0);
+ }
+
+ [Serializable]
+ private sealed class UTF32Decoder : DecoderNLS
+ {
+ // Need a place to store any extra bytes we may have picked up
+ internal int iChar = 0;
+ internal int readByteCount = 0;
+
+ public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
+ {
+ // base calls reset
+ }
+
+ public override void Reset()
+ {
+ this.iChar = 0;
+ this.readByteCount = 0;
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Anything left in our decoder?
+ internal override bool HasState
+ {
+ get
+ {
+ // ReadByteCount is our flag. (iChar==0 doesn't mean much).
+ return (this.readByteCount != 0);
+ }
+ }
+ }
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/UTF8Encoding.cs b/src/mscorlib/shared/System/Text/UTF8Encoding.cs
new file mode 100644
index 0000000000..5cfa89018a
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/UTF8Encoding.cs
@@ -0,0 +1,2668 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// The worker functions in this file was optimized for performance. If you make changes
+// you should use care to consider all of the interesting cases.
+
+// The code of all worker functions in this file is written twice: Once as as a slow loop, and the
+// second time as a fast loop. The slow loops handles all special cases, throws exceptions, etc.
+// The fast loops attempts to blaze through as fast as possible with optimistic range checks,
+// processing multiple characters at a time, and falling back to the slow loop for all special cases.
+
+// This define can be used to turn off the fast loops. Useful for finding whether
+// the problem is fastloop-specific.
+#define FASTLOOP
+
+using System;
+using System.Runtime.Serialization;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+using System.Globalization;
+
+namespace System.Text
+{
+ // Encodes text into and out of UTF-8. UTF-8 is a way of writing
+ // Unicode characters with variable numbers of bytes per character,
+ // optimized for the lower 127 ASCII characters. It's an efficient way
+ // of encoding US English in an internationalizable way.
+ //
+ // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
+ //
+ // The UTF-8 byte order mark is simply the Unicode byte order mark
+ // (0xFEFF) written in UTF-8 (0xEF 0xBB 0xBF). The byte order mark is
+ // used mostly to distinguish UTF-8 text from other encodings, and doesn't
+ // switch the byte orderings.
+
+ [Serializable]
+ public class UTF8Encoding : Encoding
+ {
+ /*
+ bytes bits UTF-8 representation
+ ----- ---- -----------------------------------
+ 1 7 0vvvvvvv
+ 2 11 110vvvvv 10vvvvvv
+ 3 16 1110vvvv 10vvvvvv 10vvvvvv
+ 4 21 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
+ ----- ---- -----------------------------------
+
+ Surrogate:
+ Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
+ */
+
+ private const int UTF8_CODEPAGE = 65001;
+
+ // Allow for devirtualization (see https://github.com/dotnet/coreclr/pull/9230)
+ [Serializable]
+ internal sealed class UTF8EncodingSealed : UTF8Encoding
+ {
+ public UTF8EncodingSealed(bool encoderShouldEmitUTF8Identifier) : base(encoderShouldEmitUTF8Identifier) { }
+ }
+
+ // Used by Encoding.UTF8 for lazy initialization
+ // The initialization code will not be run until a static member of the class is referenced
+ internal static readonly UTF8EncodingSealed s_default = new UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: true);
+
+ // Yes, the idea of emitting U+FEFF as a UTF-8 identifier has made it into
+ // the standard.
+ private bool _emitUTF8Identifier = false;
+
+ private bool _isThrowException = false;
+
+
+ public UTF8Encoding() : this(false)
+ {
+ }
+
+
+ public UTF8Encoding(bool encoderShouldEmitUTF8Identifier) :
+ this(encoderShouldEmitUTF8Identifier, false)
+ {
+ }
+
+
+ public UTF8Encoding(bool encoderShouldEmitUTF8Identifier, bool throwOnInvalidBytes) :
+ base(UTF8_CODEPAGE)
+ {
+ _emitUTF8Identifier = encoderShouldEmitUTF8Identifier;
+ _isThrowException = throwOnInvalidBytes;
+
+ // Encoding's constructor already did this, but it'll be wrong if we're throwing exceptions
+ if (_isThrowException)
+ SetDefaultFallbacks();
+ }
+
+ internal override void SetDefaultFallbacks()
+ {
+ // For UTF-X encodings, we use a replacement fallback with an empty string
+ if (_isThrowException)
+ {
+ this.encoderFallback = EncoderFallback.ExceptionFallback;
+ this.decoderFallback = DecoderFallback.ExceptionFallback;
+ }
+ else
+ {
+ this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
+ this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
+ }
+ }
+
+
+ // WARNING: GetByteCount(string chars)
+ // WARNING: has different variable names than EncodingNLS.cs, so this can't just be cut & pasted,
+ // WARNING: otherwise it'll break VB's way of declaring these.
+ //
+ // The following methods are copied from EncodingNLS.cs.
+ // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
+ // These should be kept in sync for the following classes:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ // Returns the number of bytes required to encode a range of characters in
+ // a character array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(char[] chars, int index, int count)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - index < count)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input, return 0, avoid fixed empty array problem
+ if (count == 0)
+ return 0;
+
+ // Just call the pointer version
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(String chars)
+ {
+ // Validate input
+ if (chars==null)
+ throw new ArgumentNullException("s");
+ Contract.EndContractBlock();
+
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars, chars.Length, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetByteCount(char* chars, int count)
+ {
+ // Validate Parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Call it with empty encoder
+ return GetByteCount(chars, count, null);
+ }
+
+ // Parent method is safe.
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ public override unsafe int GetBytes(String s, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ if (s == null || bytes == null)
+ throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (s.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like 0 length arrays.
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. An exception occurs if the byte array is not large
+ // enough to hold the complete encoding of the characters. The
+ // GetByteCount method can be used to determine the exact number of
+ // bytes that will be produced for a given range of characters.
+ // Alternatively, the GetMaxByteCount method can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ // Validate parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If nothing to encode return 0, avoid fixed problem
+ if (charCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like 0 length arrays.
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
+ // Remember that byteCount is # to decode, not size of array.
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetBytes(chars, charCount, bytes, byteCount, null);
+ }
+
+ // Returns the number of characters produced by decoding a range of bytes
+ // in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetCharCount(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input just return 0, fixed doesn't like 0 length arrays.
+ if (count == 0)
+ return 0;
+
+ // Just call pointer version
+ fixed (byte* pBytes = bytes)
+ return GetCharCount(pBytes + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetCharCount(byte* bytes, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetCharCount(bytes, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if ( bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (charIndex < 0 || charIndex > chars.Length)
+ throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If no input, return 0 & avoid fixed problem
+ if (byteCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int charCount = chars.Length - charIndex;
+
+ // Fixed doesn't like 0 length arrays.
+ if (chars.Length == 0)
+ chars = new char[1];
+
+ fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
+ // Remember that charCount is # to decode, not size of array
+ return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetChars(bytes, byteCount, chars, charCount, null);
+ }
+
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe String GetString(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // Avoid problems with empty input buffer
+ if (count == 0) return String.Empty;
+
+ fixed (byte* pBytes = bytes)
+ return String.CreateStringFromEncoding(
+ pBytes + index, count, this);
+ }
+
+ //
+ // End of standard methods copied from EncodingNLS.cs
+ //
+
+ // To simplify maintenance, the structure of GetByteCount and GetBytes should be
+ // kept the same as much as possible
+ internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
+ {
+ // For fallback we may need a fallback buffer.
+ // We wait to initialize it though in case we don't have any broken input unicode
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* pSrcForFallback;
+
+ char* pSrc = chars;
+ char* pEnd = pSrc + count;
+
+ // Start by assuming we have as many as count
+ int byteCount = count;
+
+ int ch = 0;
+
+ if (baseEncoder != null)
+ {
+ UTF8Encoder encoder = (UTF8Encoder)baseEncoder;
+ ch = encoder.surrogateChar;
+
+ // We mustn't have left over fallback data when counting
+ if (encoder.InternalHasFallbackBuffer)
+ {
+ fallbackBuffer = encoder.FallbackBuffer;
+ if (fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(chars, pEnd, encoder, false);
+ }
+ }
+
+ for (;;)
+ {
+ // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+ if (pSrc >= pEnd)
+ {
+ if (ch == 0)
+ {
+ // Unroll any fallback that happens at the end
+ ch = fallbackBuffer != null ? fallbackBuffer.InternalGetNextChar() : 0;
+ if (ch > 0)
+ {
+ byteCount++;
+ goto ProcessChar;
+ }
+ }
+ else
+ {
+ // Case of surrogates in the fallback.
+ if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
+ {
+ Debug.Assert(ch >= 0xD800 && ch <= 0xDBFF,
+ "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+
+ ch = fallbackBuffer.InternalGetNextChar();
+ byteCount++;
+
+ if (InRange(ch, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ ch = 0xfffd;
+ byteCount++;
+ goto EncodeChar;
+ }
+ else if (ch > 0)
+ {
+ goto ProcessChar;
+ }
+ else
+ {
+ byteCount--; // ignore last one.
+ break;
+ }
+ }
+ }
+
+ if (ch <= 0)
+ {
+ break;
+ }
+ if (baseEncoder != null && !baseEncoder.MustFlush)
+ {
+ break;
+ }
+
+ // attempt to encode the partial surrogate (will fallback or ignore it), it'll also subtract 1.
+ byteCount++;
+ goto EncodeChar;
+ }
+
+ if (ch > 0)
+ {
+ Debug.Assert(ch >= 0xD800 && ch <= 0xDBFF,
+ "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+
+ // use separate helper variables for local contexts so that the jit optimizations
+ // won't get confused about the variable lifetimes
+ int cha = *pSrc;
+
+ // count the pending surrogate
+ byteCount++;
+
+ // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
+ // if (IsLowSurrogate(cha)) {
+ if (InRange(cha, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ // Don't need a real # because we're just counting, anything > 0x7ff ('cept surrogate) will do.
+ ch = 0xfffd;
+ // ch = cha + (ch << 10) +
+ // (0x10000
+ // - CharUnicodeInfo.LOW_SURROGATE_START
+ // - (CharUnicodeInfo.HIGH_SURROGATE_START << 10) );
+
+ // Use this next char
+ pSrc++;
+ }
+ // else ch is still high surrogate and encoding will fail (so don't add count)
+
+ // attempt to encode the surrogate or partial surrogate
+ goto EncodeChar;
+ }
+
+ // If we've used a fallback, then we have to check for it
+ if (fallbackBuffer != null)
+ {
+ ch = fallbackBuffer.InternalGetNextChar();
+ if (ch > 0)
+ {
+ // We have an extra byte we weren't expecting.
+ byteCount++;
+ goto ProcessChar;
+ }
+ }
+
+ // read next char. The JIT optimization seems to be getting confused when
+ // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+ ch = *pSrc;
+ pSrc++;
+
+ ProcessChar:
+ // if (IsHighSurrogate(ch)) {
+ if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END))
+ {
+ // we will count this surrogate next time around
+ byteCount--;
+ continue;
+ }
+ // either good char or partial surrogate
+
+ EncodeChar:
+ // throw exception on partial surrogate if necessary
+ // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
+ if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ // Lone surrogates aren't allowed
+ // Have to make a fallback buffer if we don't have one
+ if (fallbackBuffer == null)
+ {
+ // wait on fallbacks if we can
+ // For fallback we may need a fallback buffer
+ if (baseEncoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = baseEncoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(chars, chars + count, baseEncoder, false);
+ }
+
+ // Do our fallback. Actually we already know its a mixed up surrogate,
+ // so the ref pSrc isn't gonna do anything.
+ pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrcForFallback);
+ pSrc = pSrcForFallback;
+
+ // Ignore it if we don't throw (we had preallocated this ch)
+ byteCount--;
+ ch = 0;
+ continue;
+ }
+
+ // Count them
+ if (ch > 0x7F)
+ {
+ if (ch > 0x7FF)
+ {
+ // the extra surrogate byte was compensated by the second surrogate character
+ // (2 surrogates make 4 bytes. We've already counted 2 bytes, 1 per char)
+ byteCount++;
+ }
+ byteCount++;
+ }
+
+#if BIT64
+ // check for overflow
+ if (byteCount < 0)
+ {
+ break;
+ }
+#endif
+
+#if FASTLOOP
+ // If still have fallback don't do fast loop
+ if (fallbackBuffer != null && (ch = fallbackBuffer.InternalGetNextChar()) != 0)
+ {
+ // We're reserving 1 byte for each char by default
+ byteCount++;
+ goto ProcessChar;
+ }
+
+ int availableChars = PtrDiff(pEnd, pSrc);
+
+ // don't fall into the fast decoding loop if we don't have enough characters
+ if (availableChars <= 13)
+ {
+ // try to get over the remainder of the ascii characters fast though
+ char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+ while (pSrc < pLocalEnd)
+ {
+ ch = *pSrc;
+ pSrc++;
+ if (ch > 0x7F)
+ goto ProcessChar;
+ }
+
+ // we are done
+ break;
+ }
+
+#if BIT64
+ // make sure that we won't get a silent overflow inside the fast loop
+ // (Fall out to slow loop if we have this many characters)
+ availableChars &= 0x0FFFFFFF;
+#endif
+
+ // To compute the upper bound, assume that all characters are ASCII characters at this point,
+ // the boundary will be decreased for every non-ASCII character we encounter
+ // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
+ char* pStop = pSrc + availableChars - (3 + 4);
+
+ while (pSrc < pStop)
+ {
+ ch = *pSrc;
+ pSrc++;
+
+ if (ch > 0x7F) // Not ASCII
+ {
+ if (ch > 0x7FF) // Not 2 Byte
+ {
+ if ((ch & 0xF800) == 0xD800) // See if its a Surrogate
+ goto LongCode;
+ byteCount++;
+ }
+ byteCount++;
+ }
+
+ // get pSrc aligned
+ if ((unchecked((int)pSrc) & 0x2) != 0)
+ {
+ ch = *pSrc;
+ pSrc++;
+ if (ch > 0x7F) // Not ASCII
+ {
+ if (ch > 0x7FF) // Not 2 Byte
+ {
+ if ((ch & 0xF800) == 0xD800) // See if its a Surrogate
+ goto LongCode;
+ byteCount++;
+ }
+ byteCount++;
+ }
+ }
+
+ // Run 2 * 4 characters at a time!
+ while (pSrc < pStop)
+ {
+ ch = *(int*)pSrc;
+ int chc = *(int*)(pSrc + 2);
+ if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0) // See if not ASCII
+ {
+ if (((ch | chc) & unchecked((int)0xF800F800)) != 0) // See if not 2 Byte
+ {
+ goto LongCodeWithMask;
+ }
+
+
+ if ((ch & unchecked((int)0xFF800000)) != 0) // Actually 0x07800780 is all we care about (4 bits)
+ byteCount++;
+ if ((ch & unchecked((int)0xFF80)) != 0)
+ byteCount++;
+ if ((chc & unchecked((int)0xFF800000)) != 0)
+ byteCount++;
+ if ((chc & unchecked((int)0xFF80)) != 0)
+ byteCount++;
+ }
+ pSrc += 4;
+
+ ch = *(int*)pSrc;
+ chc = *(int*)(pSrc + 2);
+ if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0) // See if not ASCII
+ {
+ if (((ch | chc) & unchecked((int)0xF800F800)) != 0) // See if not 2 Byte
+ {
+ goto LongCodeWithMask;
+ }
+
+ if ((ch & unchecked((int)0xFF800000)) != 0)
+ byteCount++;
+ if ((ch & unchecked((int)0xFF80)) != 0)
+ byteCount++;
+ if ((chc & unchecked((int)0xFF800000)) != 0)
+ byteCount++;
+ if ((chc & unchecked((int)0xFF80)) != 0)
+ byteCount++;
+ }
+ pSrc += 4;
+ }
+ break;
+
+ LongCodeWithMask:
+#if BIGENDIAN
+ // be careful about the sign extension
+ ch = (int)(((uint)ch) >> 16);
+#else // BIGENDIAN
+ ch = (char)ch;
+#endif // BIGENDIAN
+ pSrc++;
+
+ if (ch <= 0x7F)
+ {
+ continue;
+ }
+
+ LongCode:
+ // use separate helper variables for slow and fast loop so that the jit optimizations
+ // won't get confused about the variable lifetimes
+ if (ch > 0x7FF)
+ {
+ // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
+ if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ // 4 byte encoding - high surrogate + low surrogate
+
+ int chd = *pSrc;
+ if (
+ // !IsHighSurrogate(ch) // low without high -> bad
+ ch > CharUnicodeInfo.HIGH_SURROGATE_END ||
+ // !IsLowSurrogate(chd) // high not followed by low -> bad
+ !InRange(chd, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ // Back up and drop out to slow loop to figure out error
+ pSrc--;
+ break;
+ }
+ pSrc++;
+
+ // byteCount - this byte is compensated by the second surrogate character
+ }
+ byteCount++;
+ }
+ byteCount++;
+
+ // byteCount - the last byte is already included
+ }
+#endif // FASTLOOP
+
+ // no pending char at this point
+ ch = 0;
+ }
+
+#if BIT64
+ // check for overflow
+ if (byteCount < 0)
+ {
+ throw new ArgumentException(
+ SR.Argument_ConversionOverflow);
+ }
+#endif
+
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[UTF8Encoding.GetByteCount]Expected Empty fallback buffer");
+
+ return byteCount;
+ }
+
+ // diffs two char pointers using unsigned arithmetic. The unsigned arithmetic
+ // is good enough for us, and it tends to generate better code than the signed
+ // arithmetic generated by default
+ unsafe private static int PtrDiff(char* a, char* b)
+ {
+ return (int)(((uint)((byte*)a - (byte*)b)) >> 1);
+ }
+
+ // byte* flavor just for parity
+ unsafe private static int PtrDiff(byte* a, byte* b)
+ {
+ return (int)(a - b);
+ }
+
+ private static bool InRange(int ch, int start, int end)
+ {
+ return (uint)(ch - start) <= (uint)(end - start);
+ }
+
+ // Our workhorse
+ // Note: We ignore mismatched surrogates, unless the exception flag is set in which case we throw
+ internal override unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, EncoderNLS baseEncoder)
+ {
+ Debug.Assert(chars != null, "[UTF8Encoding.GetBytes]chars!=null");
+ Debug.Assert(byteCount >= 0, "[UTF8Encoding.GetBytes]byteCount >=0");
+ Debug.Assert(charCount >= 0, "[UTF8Encoding.GetBytes]charCount >=0");
+ Debug.Assert(bytes != null, "[UTF8Encoding.GetBytes]bytes!=null");
+
+ UTF8Encoder encoder = null;
+
+ // For fallback we may need a fallback buffer.
+ // We wait to initialize it though in case we don't have any broken input unicode
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* pSrcForFallback;
+
+ char* pSrc = chars;
+ byte* pTarget = bytes;
+
+ char* pEnd = pSrc + charCount;
+ byte* pAllocatedBufferEnd = pTarget + byteCount;
+
+ int ch = 0;
+
+ // assume that JIT will enregister pSrc, pTarget and ch
+
+ if (baseEncoder != null)
+ {
+ encoder = (UTF8Encoder)baseEncoder;
+ ch = encoder.surrogateChar;
+
+ // We mustn't have left over fallback data when counting
+ if (encoder.InternalHasFallbackBuffer)
+ {
+ // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
+ fallbackBuffer = encoder.FallbackBuffer;
+ if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(chars, pEnd, encoder, true);
+ }
+ }
+
+ for (;;)
+ {
+ // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+
+ if (pSrc >= pEnd)
+ {
+ if (ch == 0)
+ {
+ // Check if there's anthing left to get out of the fallback buffer
+ ch = fallbackBuffer != null ? fallbackBuffer.InternalGetNextChar() : 0;
+ if (ch > 0)
+ {
+ goto ProcessChar;
+ }
+ }
+ else
+ {
+ // Case of leftover surrogates in the fallback buffer
+ if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
+ {
+ Debug.Assert(ch >= 0xD800 && ch <= 0xDBFF,
+ "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+
+ int cha = ch;
+
+ ch = fallbackBuffer.InternalGetNextChar();
+
+ if (InRange(ch, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ ch = ch + (cha << 10) + (0x10000 - CharUnicodeInfo.LOW_SURROGATE_START - (CharUnicodeInfo.HIGH_SURROGATE_START << 10));
+ goto EncodeChar;
+ }
+ else if (ch > 0)
+ {
+ goto ProcessChar;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ // attempt to encode the partial surrogate (will fail or ignore)
+ if (ch > 0 && (encoder == null || encoder.MustFlush))
+ goto EncodeChar;
+
+ // We're done
+ break;
+ }
+
+ if (ch > 0)
+ {
+ // We have a high surrogate left over from a previous loop.
+ Debug.Assert(ch >= 0xD800 && ch <= 0xDBFF,
+ "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
+
+ // use separate helper variables for local contexts so that the jit optimizations
+ // won't get confused about the variable lifetimes
+ int cha = *pSrc;
+
+ // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
+ // if (IsLowSurrogate(cha)) {
+ if (InRange(cha, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ ch = cha + (ch << 10) +
+ (0x10000
+ - CharUnicodeInfo.LOW_SURROGATE_START
+ - (CharUnicodeInfo.HIGH_SURROGATE_START << 10));
+
+ pSrc++;
+ }
+ // else ch is still high surrogate and encoding will fail
+
+ // attempt to encode the surrogate or partial surrogate
+ goto EncodeChar;
+ }
+
+ // If we've used a fallback, then we have to check for it
+ if (fallbackBuffer != null)
+ {
+ ch = fallbackBuffer.InternalGetNextChar();
+ if (ch > 0) goto ProcessChar;
+ }
+
+ // read next char. The JIT optimization seems to be getting confused when
+ // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+ ch = *pSrc;
+ pSrc++;
+
+ ProcessChar:
+ // if (IsHighSurrogate(ch)) {
+ if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END))
+ {
+ continue;
+ }
+ // either good char or partial surrogate
+
+ EncodeChar:
+ // throw exception on partial surrogate if necessary
+ // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
+ if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ // Lone surrogates aren't allowed, we have to do fallback for them
+ // Have to make a fallback buffer if we don't have one
+ if (fallbackBuffer == null)
+ {
+ // wait on fallbacks if we can
+ // For fallback we may need a fallback buffer
+ if (baseEncoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = baseEncoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(chars, pEnd, baseEncoder, true);
+ }
+
+ // Do our fallback. Actually we already know its a mixed up surrogate,
+ // so the ref pSrc isn't gonna do anything.
+ pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrcForFallback);
+ pSrc = pSrcForFallback;
+
+ // Ignore it if we don't throw
+ ch = 0;
+ continue;
+ }
+
+ // Count bytes needed
+ int bytesNeeded = 1;
+ if (ch > 0x7F)
+ {
+ if (ch > 0x7FF)
+ {
+ if (ch > 0xFFFF)
+ {
+ bytesNeeded++; // 4 bytes (surrogate pair)
+ }
+ bytesNeeded++; // 3 bytes (800-FFFF)
+ }
+ bytesNeeded++; // 2 bytes (80-7FF)
+ }
+
+ if (pTarget > pAllocatedBufferEnd - bytesNeeded)
+ {
+ // Left over surrogate from last time will cause pSrc == chars, so we'll throw
+ if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
+ {
+ fallbackBuffer.MovePrevious(); // Didn't use this fallback char
+ if (ch > 0xFFFF)
+ fallbackBuffer.MovePrevious(); // Was surrogate, didn't use 2nd part either
+ }
+ else
+ {
+ pSrc--; // Didn't use this char
+ if (ch > 0xFFFF)
+ pSrc--; // Was surrogate, didn't use 2nd part either
+ }
+ Debug.Assert(pSrc >= chars || pTarget == bytes,
+ "[UTF8Encoding.GetBytes]Expected pSrc to be within buffer or to throw with insufficient room.");
+ ThrowBytesOverflow(encoder, pTarget == bytes); // Throw if we must
+ ch = 0; // Nothing left over (we backed up to start of pair if supplimentary)
+ break;
+ }
+
+ if (ch <= 0x7F)
+ {
+ *pTarget = (byte)ch;
+ }
+ else
+ {
+ // use separate helper variables for local contexts so that the jit optimizations
+ // won't get confused about the variable lifetimes
+ int chb;
+ if (ch <= 0x7FF)
+ {
+ // 2 byte encoding
+ chb = (byte)(unchecked((sbyte)0xC0) | (ch >> 6));
+ }
+ else
+ {
+ if (ch <= 0xFFFF)
+ {
+ chb = (byte)(unchecked((sbyte)0xE0) | (ch >> 12));
+ }
+ else
+ {
+ *pTarget = (byte)(unchecked((sbyte)0xF0) | (ch >> 18));
+ pTarget++;
+
+ chb = unchecked((sbyte)0x80) | (ch >> 12) & 0x3F;
+ }
+ *pTarget = (byte)chb;
+ pTarget++;
+
+ chb = unchecked((sbyte)0x80) | (ch >> 6) & 0x3F;
+ }
+ *pTarget = (byte)chb;
+ pTarget++;
+
+ *pTarget = (byte)(unchecked((sbyte)0x80) | ch & 0x3F);
+ }
+ pTarget++;
+
+
+#if FASTLOOP
+ // If still have fallback don't do fast loop
+ if (fallbackBuffer != null && (ch = fallbackBuffer.InternalGetNextChar()) != 0)
+ goto ProcessChar;
+
+ int availableChars = PtrDiff(pEnd, pSrc);
+ int availableBytes = PtrDiff(pAllocatedBufferEnd, pTarget);
+
+ // don't fall into the fast decoding loop if we don't have enough characters
+ // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
+ if (availableChars <= 13)
+ {
+ // we are hoping for 1 byte per char
+ if (availableBytes < availableChars)
+ {
+ // not enough output room. no pending bits at this point
+ ch = 0;
+ continue;
+ }
+
+ // try to get over the remainder of the ascii characters fast though
+ char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+ while (pSrc < pLocalEnd)
+ {
+ ch = *pSrc;
+ pSrc++;
+
+ // Not ASCII, need more than 1 byte per char
+ if (ch > 0x7F)
+ goto ProcessChar;
+
+ *pTarget = (byte)ch;
+ pTarget++;
+ }
+ // we are done, let ch be 0 to clear encoder
+ ch = 0;
+ break;
+ }
+
+ // we need at least 1 byte per character, but Convert might allow us to convert
+ // only part of the input, so try as much as we can. Reduce charCount if necessary
+ if (availableBytes < availableChars)
+ {
+ availableChars = availableBytes;
+ }
+
+ // FASTLOOP:
+ // - optimistic range checks
+ // - fallbacks to the slow loop for all special cases, exception throwing, etc.
+
+ // To compute the upper bound, assume that all characters are ASCII characters at this point,
+ // the boundary will be decreased for every non-ASCII character we encounter
+ // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
+ // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop.
+ char* pStop = pSrc + availableChars - 5;
+
+ while (pSrc < pStop)
+ {
+ ch = *pSrc;
+ pSrc++;
+
+ if (ch > 0x7F)
+ {
+ goto LongCode;
+ }
+ *pTarget = (byte)ch;
+ pTarget++;
+
+ // get pSrc aligned
+ if ((unchecked((int)pSrc) & 0x2) != 0)
+ {
+ ch = *pSrc;
+ pSrc++;
+ if (ch > 0x7F)
+ {
+ goto LongCode;
+ }
+ *pTarget = (byte)ch;
+ pTarget++;
+ }
+
+ // Run 4 characters at a time!
+ while (pSrc < pStop)
+ {
+ ch = *(int*)pSrc;
+ int chc = *(int*)(pSrc + 2);
+ if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0)
+ {
+ goto LongCodeWithMask;
+ }
+
+ // Unfortunately, this is endianess sensitive
+#if BIGENDIAN
+ *pTarget = (byte)(ch>>16);
+ *(pTarget+1) = (byte)ch;
+ pSrc += 4;
+ *(pTarget+2) = (byte)(chc>>16);
+ *(pTarget+3) = (byte)chc;
+ pTarget += 4;
+#else // BIGENDIAN
+ *pTarget = (byte)ch;
+ *(pTarget + 1) = (byte)(ch >> 16);
+ pSrc += 4;
+ *(pTarget + 2) = (byte)chc;
+ *(pTarget + 3) = (byte)(chc >> 16);
+ pTarget += 4;
+#endif // BIGENDIAN
+ }
+ continue;
+
+ LongCodeWithMask:
+#if BIGENDIAN
+ // be careful about the sign extension
+ ch = (int)(((uint)ch) >> 16);
+#else // BIGENDIAN
+ ch = (char)ch;
+#endif // BIGENDIAN
+ pSrc++;
+
+ if (ch > 0x7F)
+ {
+ goto LongCode;
+ }
+ *pTarget = (byte)ch;
+ pTarget++;
+ continue;
+
+ LongCode:
+ // use separate helper variables for slow and fast loop so that the jit optimizations
+ // won't get confused about the variable lifetimes
+ int chd;
+ if (ch <= 0x7FF)
+ {
+ // 2 byte encoding
+ chd = unchecked((sbyte)0xC0) | (ch >> 6);
+ }
+ else
+ {
+ // if (!IsLowSurrogate(ch) && !IsHighSurrogate(ch))
+ if (!InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ // 3 byte encoding
+ chd = unchecked((sbyte)0xE0) | (ch >> 12);
+ }
+ else
+ {
+ // 4 byte encoding - high surrogate + low surrogate
+ // if (!IsHighSurrogate(ch))
+ if (ch > CharUnicodeInfo.HIGH_SURROGATE_END)
+ {
+ // low without high -> bad, try again in slow loop
+ pSrc -= 1;
+ break;
+ }
+
+ chd = *pSrc;
+ pSrc++;
+
+ // if (!IsLowSurrogate(chd)) {
+ if (!InRange(chd, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
+ {
+ // high not followed by low -> bad, try again in slow loop
+ pSrc -= 2;
+ break;
+ }
+
+ ch = chd + (ch << 10) +
+ (0x10000
+ - CharUnicodeInfo.LOW_SURROGATE_START
+ - (CharUnicodeInfo.HIGH_SURROGATE_START << 10));
+
+ *pTarget = (byte)(unchecked((sbyte)0xF0) | (ch >> 18));
+ // pStop - this byte is compensated by the second surrogate character
+ // 2 input chars require 4 output bytes. 2 have been anticipated already
+ // and 2 more will be accounted for by the 2 pStop-- calls below.
+ pTarget++;
+
+ chd = unchecked((sbyte)0x80) | (ch >> 12) & 0x3F;
+ }
+ *pTarget = (byte)chd;
+ pStop--; // 3 byte sequence for 1 char, so need pStop-- and the one below too.
+ pTarget++;
+
+ chd = unchecked((sbyte)0x80) | (ch >> 6) & 0x3F;
+ }
+ *pTarget = (byte)chd;
+ pStop--; // 2 byte sequence for 1 char so need pStop--.
+ pTarget++;
+
+ *pTarget = (byte)(unchecked((sbyte)0x80) | ch & 0x3F);
+ // pStop - this byte is already included
+ pTarget++;
+ }
+
+ Debug.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetBytes]pTarget <= pAllocatedBufferEnd");
+
+#endif // FASTLOOP
+
+ // no pending char at this point
+ ch = 0;
+ }
+
+ // Do we have to set the encoder bytes?
+ if (encoder != null)
+ {
+ Debug.Assert(!encoder.MustFlush || ch == 0,
+ "[UTF8Encoding.GetBytes] Expected no mustflush or 0 leftover ch " + ch.ToString("X2", CultureInfo.InvariantCulture));
+
+ encoder.surrogateChar = ch;
+ encoder.m_charsUsed = (int)(pSrc - chars);
+ }
+
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
+ baseEncoder == null || !baseEncoder.m_throwOnOverflow,
+ "[UTF8Encoding.GetBytes]Expected empty fallback buffer if not converting");
+
+ return (int)(pTarget - bytes);
+ }
+
+
+ // These are bitmasks used to maintain the state in the decoder. They occupy the higher bits
+ // while the actual character is being built in the lower bits. They are shifted together
+ // with the actual bits of the character.
+
+ // bits 30 & 31 are used for pending bits fixup
+ private const int FinalByte = 1 << 29;
+ private const int SupplimentarySeq = 1 << 28;
+ private const int ThreeByteSeq = 1 << 27;
+
+ // Note: We throw exceptions on individually encoded surrogates and other non-shortest forms.
+ // If exceptions aren't turned on, then we drop all non-shortest &individual surrogates.
+ //
+ // To simplify maintenance, the structure of GetCharCount and GetChars should be
+ // kept the same as much as possible
+ internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(count >= 0, "[UTF8Encoding.GetCharCount]count >=0");
+ Debug.Assert(bytes != null, "[UTF8Encoding.GetCharCount]bytes!=null");
+
+ // Initialize stuff
+ byte* pSrc = bytes;
+ byte* pEnd = pSrc + count;
+
+ // Start by assuming we have as many as count, charCount always includes the adjustment
+ // for the character being decoded
+ int charCount = count;
+ int ch = 0;
+ DecoderFallbackBuffer fallback = null;
+
+ if (baseDecoder != null)
+ {
+ UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
+ ch = decoder.bits;
+ charCount -= (ch >> 30); // Adjust char count for # of expected bytes and expected output chars.
+
+ // Shouldn't have anything in fallback buffer for GetCharCount
+ // (don't have to check m_throwOnOverflow for count)
+ Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
+ "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at start");
+ }
+
+ for (;;)
+ {
+ // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+
+ if (pSrc >= pEnd)
+ {
+ break;
+ }
+
+ if (ch == 0)
+ {
+ // no pending bits
+ goto ReadChar;
+ }
+
+ // read next byte. The JIT optimization seems to be getting confused when
+ // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+ int cha = *pSrc;
+ pSrc++;
+
+ // we are expecting to see trailing bytes like 10vvvvvv
+ if ((cha & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ // This can be a valid starting byte for another UTF8 byte sequence, so let's put
+ // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
+ pSrc--;
+ charCount += (ch >> 30);
+ goto InvalidByteSequence;
+ }
+
+ // fold in the new byte
+ ch = (ch << 6) | (cha & 0x3F);
+
+ if ((ch & FinalByte) == 0)
+ {
+ Debug.Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
+ "[UTF8Encoding.GetChars]Invariant volation");
+
+ if ((ch & SupplimentarySeq) != 0)
+ {
+ if ((ch & (FinalByte >> 6)) != 0)
+ {
+ // this is 3rd byte (of 4 byte supplimentary) - nothing to do
+ continue;
+ }
+
+ // 2nd byte, check for non-shortest form of supplimentary char and the valid
+ // supplimentary characters in range 0x010000 - 0x10FFFF at the same time
+ if (!InRange(ch & 0x1F0, 0x10, 0x100))
+ {
+ goto InvalidByteSequence;
+ }
+ }
+ else
+ {
+ // Must be 2nd byte of a 3-byte sequence
+ // check for non-shortest form of 3 byte seq
+ if ((ch & (0x1F << 5)) == 0 || // non-shortest form
+ (ch & (0xF800 >> 6)) == (0xD800 >> 6)) // illegal individually encoded surrogate
+ {
+ goto InvalidByteSequence;
+ }
+ }
+ continue;
+ }
+
+ // ready to punch
+
+ // adjust for surrogates in non-shortest form
+ if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq)
+ {
+ charCount--;
+ }
+ goto EncodeChar;
+
+ InvalidByteSequence:
+ // this code fragment should be close to the gotos referencing it
+ // Have to do fallback for invalid bytes
+ if (fallback == null)
+ {
+ if (baseDecoder == null)
+ fallback = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallback = baseDecoder.FallbackBuffer;
+ fallback.InternalInitialize(bytes, null);
+ }
+ charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
+
+ ch = 0;
+ continue;
+
+ ReadChar:
+ ch = *pSrc;
+ pSrc++;
+
+ ProcessChar:
+ if (ch > 0x7F)
+ {
+ // If its > 0x7F, its start of a new multi-byte sequence
+
+ // Long sequence, so unreserve our char.
+ charCount--;
+
+ // bit 6 has to be non-zero for start of multibyte chars.
+ if ((ch & 0x40) == 0)
+ {
+ // Unexpected trail byte
+ goto InvalidByteSequence;
+ }
+
+ // start a new long code
+ if ((ch & 0x20) != 0)
+ {
+ if ((ch & 0x10) != 0)
+ {
+ // 4 byte encoding - supplimentary character (2 surrogates)
+
+ ch &= 0x0F;
+
+ // check that bit 4 is zero and the valid supplimentary character
+ // range 0x000000 - 0x10FFFF at the same time
+ if (ch > 0x04)
+ {
+ ch |= 0xf0;
+ goto InvalidByteSequence;
+ }
+
+ // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
+ // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag.
+ ch |= (FinalByte >> 3 * 6) | // Final byte is 3 more bytes from now
+ (1 << 30) | // If it dies on next byte we'll need an extra char
+ (3 << (30 - 2 * 6)) | // If it dies on last byte we'll need to subtract a char
+ (SupplimentarySeq) | (SupplimentarySeq >> 6) |
+ (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
+
+ // Our character count will be 2 characters for these 4 bytes, so subtract another char
+ charCount--;
+ }
+ else
+ {
+ // 3 byte encoding
+ // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
+ ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
+ (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
+
+ // We'll expect 1 character for these 3 bytes, so subtract another char.
+ charCount--;
+ }
+ }
+ else
+ {
+ // 2 byte encoding
+
+ ch &= 0x1F;
+
+ // check for non-shortest form
+ if (ch <= 1)
+ {
+ ch |= 0xc0;
+ goto InvalidByteSequence;
+ }
+
+ // Add bit flags so we'll be flagged correctly
+ ch |= (FinalByte >> 6);
+ }
+ continue;
+ }
+
+ EncodeChar:
+
+#if FASTLOOP
+ int availableBytes = PtrDiff(pEnd, pSrc);
+
+ // don't fall into the fast decoding loop if we don't have enough bytes
+ if (availableBytes <= 13)
+ {
+ // try to get over the remainder of the ascii characters fast though
+ byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+ while (pSrc < pLocalEnd)
+ {
+ ch = *pSrc;
+ pSrc++;
+
+ if (ch > 0x7F)
+ goto ProcessChar;
+ }
+ // we are done
+ ch = 0;
+ break;
+ }
+
+ // To compute the upper bound, assume that all characters are ASCII characters at this point,
+ // the boundary will be decreased for every non-ASCII character we encounter
+ // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
+ byte* pStop = pSrc + availableBytes - 7;
+
+ while (pSrc < pStop)
+ {
+ ch = *pSrc;
+ pSrc++;
+
+ if (ch > 0x7F)
+ {
+ goto LongCode;
+ }
+
+ // get pSrc 2-byte aligned
+ if ((unchecked((int)pSrc) & 0x1) != 0)
+ {
+ ch = *pSrc;
+ pSrc++;
+ if (ch > 0x7F)
+ {
+ goto LongCode;
+ }
+ }
+
+ // get pSrc 4-byte aligned
+ if ((unchecked((int)pSrc) & 0x2) != 0)
+ {
+ ch = *(ushort*)pSrc;
+ if ((ch & 0x8080) != 0)
+ {
+ goto LongCodeWithMask16;
+ }
+ pSrc += 2;
+ }
+
+ // Run 8 + 8 characters at a time!
+ while (pSrc < pStop)
+ {
+ ch = *(int*)pSrc;
+ int chb = *(int*)(pSrc + 4);
+ if (((ch | chb) & unchecked((int)0x80808080)) != 0)
+ {
+ goto LongCodeWithMask32;
+ }
+ pSrc += 8;
+
+ // This is a really small loop - unroll it
+ if (pSrc >= pStop)
+ break;
+
+ ch = *(int*)pSrc;
+ chb = *(int*)(pSrc + 4);
+ if (((ch | chb) & unchecked((int)0x80808080)) != 0)
+ {
+ goto LongCodeWithMask32;
+ }
+ pSrc += 8;
+ }
+ break;
+
+#if BIGENDIAN
+ LongCodeWithMask32:
+ // be careful about the sign extension
+ ch = (int)(((uint)ch) >> 16);
+ LongCodeWithMask16:
+ ch = (int)(((uint)ch) >> 8);
+#else // BIGENDIAN
+ LongCodeWithMask32:
+ LongCodeWithMask16:
+ ch &= 0xFF;
+#endif // BIGENDIAN
+ pSrc++;
+ if (ch <= 0x7F)
+ {
+ continue;
+ }
+
+ LongCode:
+ int chc = *pSrc;
+ pSrc++;
+
+ if (
+ // bit 6 has to be zero
+ (ch & 0x40) == 0 ||
+ // we are expecting to see trailing bytes like 10vvvvvv
+ (chc & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ goto BadLongCode;
+ }
+
+ chc &= 0x3F;
+
+ // start a new long code
+ if ((ch & 0x20) != 0)
+ {
+ // fold the first two bytes together
+ chc |= (ch & 0x0F) << 6;
+
+ if ((ch & 0x10) != 0)
+ {
+ // 4 byte encoding - surrogate
+ ch = *pSrc;
+ if (
+ // check that bit 4 is zero, the non-shortest form of surrogate
+ // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
+ !InRange(chc >> 4, 0x01, 0x10) ||
+ // we are expecting to see trailing bytes like 10vvvvvv
+ (ch & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ goto BadLongCode;
+ }
+
+ chc = (chc << 6) | (ch & 0x3F);
+
+ ch = *(pSrc + 1);
+ // we are expecting to see trailing bytes like 10vvvvvv
+ if ((ch & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ goto BadLongCode;
+ }
+ pSrc += 2;
+
+ // extra byte
+ charCount--;
+ }
+ else
+ {
+ // 3 byte encoding
+ ch = *pSrc;
+ if (
+ // check for non-shortest form of 3 byte seq
+ (chc & (0x1F << 5)) == 0 ||
+ // Can't have surrogates here.
+ (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
+ // we are expecting to see trailing bytes like 10vvvvvv
+ (ch & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ goto BadLongCode;
+ }
+ pSrc++;
+
+ // extra byte
+ charCount--;
+ }
+ }
+ else
+ {
+ // 2 byte encoding
+
+ // check for non-shortest form
+ if ((ch & 0x1E) == 0)
+ {
+ goto BadLongCode;
+ }
+ }
+
+ // extra byte
+ charCount--;
+ }
+#endif // FASTLOOP
+
+ // no pending bits at this point
+ ch = 0;
+ continue;
+
+ BadLongCode:
+ pSrc -= 2;
+ ch = 0;
+ continue;
+ }
+
+ // May have a problem if we have to flush
+ if (ch != 0)
+ {
+ // We were already adjusting for these, so need to unadjust
+ charCount += (ch >> 30);
+ if (baseDecoder == null || baseDecoder.MustFlush)
+ {
+ // Have to do fallback for invalid bytes
+ if (fallback == null)
+ {
+ if (baseDecoder == null)
+ fallback = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallback = baseDecoder.FallbackBuffer;
+ fallback.InternalInitialize(bytes, null);
+ }
+ charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
+ }
+ }
+
+ // Shouldn't have anything in fallback buffer for GetCharCount
+ // (don't have to check m_throwOnOverflow for count)
+ Debug.Assert(fallback == null || fallback.Remaining == 0,
+ "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at end");
+
+ return charCount;
+ }
+
+ // WARNING: If we throw an error, then System.Resources.ResourceReader calls this method.
+ // So if we're really broken, then that could also throw an error... recursively.
+ // So try to make sure GetChars can at least process all uses by
+ // System.Resources.ResourceReader!
+ //
+ // Note: We throw exceptions on individually encoded surrogates and other non-shortest forms.
+ // If exceptions aren't turned on, then we drop all non-shortest &individual surrogates.
+ //
+ // To simplify maintenance, the structure of GetCharCount and GetChars should be
+ // kept the same as much as possible
+ internal override unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(chars != null, "[UTF8Encoding.GetChars]chars!=null");
+ Debug.Assert(byteCount >= 0, "[UTF8Encoding.GetChars]count >=0");
+ Debug.Assert(charCount >= 0, "[UTF8Encoding.GetChars]charCount >=0");
+ Debug.Assert(bytes != null, "[UTF8Encoding.GetChars]bytes!=null");
+
+ byte* pSrc = bytes;
+ char* pTarget = chars;
+
+ byte* pEnd = pSrc + byteCount;
+ char* pAllocatedBufferEnd = pTarget + charCount;
+
+ int ch = 0;
+
+ DecoderFallbackBuffer fallback = null;
+ byte* pSrcForFallback;
+ char* pTargetForFallback;
+ if (baseDecoder != null)
+ {
+ UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
+ ch = decoder.bits;
+
+ // Shouldn't have anything in fallback buffer for GetChars
+ // (don't have to check m_throwOnOverflow for chars, we always use all or none so always should be empty)
+ Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
+ "[UTF8Encoding.GetChars]Expected empty fallback buffer at start");
+ }
+
+ for (;;)
+ {
+ // SLOWLOOP: does all range checks, handles all special cases, but it is slow
+
+ if (pSrc >= pEnd)
+ {
+ break;
+ }
+
+ if (ch == 0)
+ {
+ // no pending bits
+ goto ReadChar;
+ }
+
+ // read next byte. The JIT optimization seems to be getting confused when
+ // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
+ int cha = *pSrc;
+ pSrc++;
+
+ // we are expecting to see trailing bytes like 10vvvvvv
+ if ((cha & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ // This can be a valid starting byte for another UTF8 byte sequence, so let's put
+ // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
+ pSrc--;
+ goto InvalidByteSequence;
+ }
+
+ // fold in the new byte
+ ch = (ch << 6) | (cha & 0x3F);
+
+ if ((ch & FinalByte) == 0)
+ {
+ // Not at last byte yet
+ Debug.Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
+ "[UTF8Encoding.GetChars]Invariant volation");
+
+ if ((ch & SupplimentarySeq) != 0)
+ {
+ // Its a 4-byte supplimentary sequence
+ if ((ch & (FinalByte >> 6)) != 0)
+ {
+ // this is 3rd byte of 4 byte sequence - nothing to do
+ continue;
+ }
+
+ // 2nd byte of 4 bytes
+ // check for non-shortest form of surrogate and the valid surrogate
+ // range 0x000000 - 0x10FFFF at the same time
+ if (!InRange(ch & 0x1F0, 0x10, 0x100))
+ {
+ goto InvalidByteSequence;
+ }
+ }
+ else
+ {
+ // Must be 2nd byte of a 3-byte sequence
+ // check for non-shortest form of 3 byte seq
+ if ((ch & (0x1F << 5)) == 0 || // non-shortest form
+ (ch & (0xF800 >> 6)) == (0xD800 >> 6)) // illegal individually encoded surrogate
+ {
+ goto InvalidByteSequence;
+ }
+ }
+ continue;
+ }
+
+ // ready to punch
+
+ // surrogate in shortest form?
+ // Might be possible to get rid of this? Already did non-shortest check for 4-byte sequence when reading 2nd byte?
+ if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq)
+ {
+ // let the range check for the second char throw the exception
+ if (pTarget < pAllocatedBufferEnd)
+ {
+ *pTarget = (char)(((ch >> 10) & 0x7FF) +
+ unchecked((short)((CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10)))));
+ pTarget++;
+
+ ch = (ch & 0x3FF) +
+ unchecked((int)(CharUnicodeInfo.LOW_SURROGATE_START));
+ }
+ }
+
+ goto EncodeChar;
+
+ InvalidByteSequence:
+ // this code fragment should be close to the gotos referencing it
+ // Have to do fallback for invalid bytes
+ if (fallback == null)
+ {
+ if (baseDecoder == null)
+ fallback = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallback = baseDecoder.FallbackBuffer;
+ fallback.InternalInitialize(bytes, pAllocatedBufferEnd);
+ }
+ // This'll back us up the appropriate # of bytes if we didn't get anywhere
+ pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be enregistered
+ pTargetForFallback = pTarget; // Avoid passing pTarget by reference to allow it to be enregistered
+ bool fallbackResult = FallbackInvalidByteSequence(ref pSrcForFallback, ch, fallback, ref pTargetForFallback);
+ pSrc = pSrcForFallback;
+ pTarget = pTargetForFallback;
+
+ if (!fallbackResult)
+ {
+ // Ran out of buffer space
+ // Need to throw an exception?
+ Debug.Assert(pSrc >= bytes || pTarget == chars,
+ "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer after fallback");
+ fallback.InternalReset();
+ ThrowCharsOverflow(baseDecoder, pTarget == chars);
+ ch = 0;
+ break;
+ }
+ Debug.Assert(pSrc >= bytes,
+ "[UTF8Encoding.GetChars]Expected invalid byte sequence to have remained within the byte array");
+ ch = 0;
+ continue;
+
+ ReadChar:
+ ch = *pSrc;
+ pSrc++;
+
+ ProcessChar:
+ if (ch > 0x7F)
+ {
+ // If its > 0x7F, its start of a new multi-byte sequence
+
+ // bit 6 has to be non-zero
+ if ((ch & 0x40) == 0)
+ {
+ goto InvalidByteSequence;
+ }
+
+ // start a new long code
+ if ((ch & 0x20) != 0)
+ {
+ if ((ch & 0x10) != 0)
+ {
+ // 4 byte encoding - supplimentary character (2 surrogates)
+
+ ch &= 0x0F;
+
+ // check that bit 4 is zero and the valid supplimentary character
+ // range 0x000000 - 0x10FFFF at the same time
+ if (ch > 0x04)
+ {
+ ch |= 0xf0;
+ goto InvalidByteSequence;
+ }
+
+ ch |= (FinalByte >> 3 * 6) | (1 << 30) | (3 << (30 - 2 * 6)) |
+ (SupplimentarySeq) | (SupplimentarySeq >> 6) |
+ (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
+ }
+ else
+ {
+ // 3 byte encoding
+ ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
+ (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
+ }
+ }
+ else
+ {
+ // 2 byte encoding
+
+ ch &= 0x1F;
+
+ // check for non-shortest form
+ if (ch <= 1)
+ {
+ ch |= 0xc0;
+ goto InvalidByteSequence;
+ }
+
+ ch |= (FinalByte >> 6);
+ }
+ continue;
+ }
+
+ EncodeChar:
+ // write the pending character
+ if (pTarget >= pAllocatedBufferEnd)
+ {
+ // Fix chars so we make sure to throw if we didn't output anything
+ ch &= 0x1fffff;
+ if (ch > 0x7f)
+ {
+ if (ch > 0x7ff)
+ {
+ if (ch >= CharUnicodeInfo.LOW_SURROGATE_START &&
+ ch <= CharUnicodeInfo.LOW_SURROGATE_END)
+ {
+ pSrc--; // It was 4 bytes
+ pTarget--; // 1 was stored already, but we can't remember 1/2, so back up
+ }
+ else if (ch > 0xffff)
+ {
+ pSrc--; // It was 4 bytes, nothing was stored
+ }
+ pSrc--; // It was at least 3 bytes
+ }
+ pSrc--; // It was at least 2 bytes
+ }
+ pSrc--;
+
+ // Throw that we don't have enough room (pSrc could be < chars if we had started to process
+ // a 4 byte sequence alredy)
+ Debug.Assert(pSrc >= bytes || pTarget == chars,
+ "[UTF8Encoding.GetChars]Expected pSrc to be within input buffer or throw due to no output]");
+ ThrowCharsOverflow(baseDecoder, pTarget == chars);
+
+ // Don't store ch in decoder, we already backed up to its start
+ ch = 0;
+
+ // Didn't throw, just use this buffer size.
+ break;
+ }
+ *pTarget = (char)ch;
+ pTarget++;
+
+#if FASTLOOP
+ int availableChars = PtrDiff(pAllocatedBufferEnd, pTarget);
+ int availableBytes = PtrDiff(pEnd, pSrc);
+
+ // don't fall into the fast decoding loop if we don't have enough bytes
+ // Test for availableChars is done because pStop would be <= pTarget.
+ if (availableBytes <= 13)
+ {
+ // we may need as many as 1 character per byte
+ if (availableChars < availableBytes)
+ {
+ // not enough output room. no pending bits at this point
+ ch = 0;
+ continue;
+ }
+
+ // try to get over the remainder of the ascii characters fast though
+ byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
+ while (pSrc < pLocalEnd)
+ {
+ ch = *pSrc;
+ pSrc++;
+
+ if (ch > 0x7F)
+ goto ProcessChar;
+
+ *pTarget = (char)ch;
+ pTarget++;
+ }
+ // we are done
+ ch = 0;
+ break;
+ }
+
+ // we may need as many as 1 character per byte, so reduce the byte count if necessary.
+ // If availableChars is too small, pStop will be before pTarget and we won't do fast loop.
+ if (availableChars < availableBytes)
+ {
+ availableBytes = availableChars;
+ }
+
+ // To compute the upper bound, assume that all characters are ASCII characters at this point,
+ // the boundary will be decreased for every non-ASCII character we encounter
+ // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
+ char* pStop = pTarget + availableBytes - 7;
+
+ while (pTarget < pStop)
+ {
+ ch = *pSrc;
+ pSrc++;
+
+ if (ch > 0x7F)
+ {
+ goto LongCode;
+ }
+ *pTarget = (char)ch;
+ pTarget++;
+
+ // get pSrc to be 2-byte aligned
+ if ((unchecked((int)pSrc) & 0x1) != 0)
+ {
+ ch = *pSrc;
+ pSrc++;
+ if (ch > 0x7F)
+ {
+ goto LongCode;
+ }
+ *pTarget = (char)ch;
+ pTarget++;
+ }
+
+ // get pSrc to be 4-byte aligned
+ if ((unchecked((int)pSrc) & 0x2) != 0)
+ {
+ ch = *(ushort*)pSrc;
+ if ((ch & 0x8080) != 0)
+ {
+ goto LongCodeWithMask16;
+ }
+
+ // Unfortunately, this is endianess sensitive
+#if BIGENDIAN
+ *pTarget = (char)((ch >> 8) & 0x7F);
+ pSrc += 2;
+ *(pTarget+1) = (char)(ch & 0x7F);
+ pTarget += 2;
+#else // BIGENDIAN
+ *pTarget = (char)(ch & 0x7F);
+ pSrc += 2;
+ *(pTarget + 1) = (char)((ch >> 8) & 0x7F);
+ pTarget += 2;
+#endif // BIGENDIAN
+ }
+
+ // Run 8 characters at a time!
+ while (pTarget < pStop)
+ {
+ ch = *(int*)pSrc;
+ int chb = *(int*)(pSrc + 4);
+ if (((ch | chb) & unchecked((int)0x80808080)) != 0)
+ {
+ goto LongCodeWithMask32;
+ }
+
+ // Unfortunately, this is endianess sensitive
+#if BIGENDIAN
+ *pTarget = (char)((ch >> 24) & 0x7F);
+ *(pTarget+1) = (char)((ch >> 16) & 0x7F);
+ *(pTarget+2) = (char)((ch >> 8) & 0x7F);
+ *(pTarget+3) = (char)(ch & 0x7F);
+ pSrc += 8;
+ *(pTarget+4) = (char)((chb >> 24) & 0x7F);
+ *(pTarget+5) = (char)((chb >> 16) & 0x7F);
+ *(pTarget+6) = (char)((chb >> 8) & 0x7F);
+ *(pTarget+7) = (char)(chb & 0x7F);
+ pTarget += 8;
+#else // BIGENDIAN
+ *pTarget = (char)(ch & 0x7F);
+ *(pTarget + 1) = (char)((ch >> 8) & 0x7F);
+ *(pTarget + 2) = (char)((ch >> 16) & 0x7F);
+ *(pTarget + 3) = (char)((ch >> 24) & 0x7F);
+ pSrc += 8;
+ *(pTarget + 4) = (char)(chb & 0x7F);
+ *(pTarget + 5) = (char)((chb >> 8) & 0x7F);
+ *(pTarget + 6) = (char)((chb >> 16) & 0x7F);
+ *(pTarget + 7) = (char)((chb >> 24) & 0x7F);
+ pTarget += 8;
+#endif // BIGENDIAN
+ }
+ break;
+
+#if BIGENDIAN
+ LongCodeWithMask32:
+ // be careful about the sign extension
+ ch = (int)(((uint)ch) >> 16);
+ LongCodeWithMask16:
+ ch = (int)(((uint)ch) >> 8);
+#else // BIGENDIAN
+ LongCodeWithMask32:
+ LongCodeWithMask16:
+ ch &= 0xFF;
+#endif // BIGENDIAN
+ pSrc++;
+ if (ch <= 0x7F)
+ {
+ *pTarget = (char)ch;
+ pTarget++;
+ continue;
+ }
+
+ LongCode:
+ int chc = *pSrc;
+ pSrc++;
+
+ if (
+ // bit 6 has to be zero
+ (ch & 0x40) == 0 ||
+ // we are expecting to see trailing bytes like 10vvvvvv
+ (chc & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ goto BadLongCode;
+ }
+
+ chc &= 0x3F;
+
+ // start a new long code
+ if ((ch & 0x20) != 0)
+ {
+ // fold the first two bytes together
+ chc |= (ch & 0x0F) << 6;
+
+ if ((ch & 0x10) != 0)
+ {
+ // 4 byte encoding - surrogate
+ ch = *pSrc;
+ if (
+ // check that bit 4 is zero, the non-shortest form of surrogate
+ // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
+ !InRange(chc >> 4, 0x01, 0x10) ||
+ // we are expecting to see trailing bytes like 10vvvvvv
+ (ch & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ goto BadLongCode;
+ }
+
+ chc = (chc << 6) | (ch & 0x3F);
+
+ ch = *(pSrc + 1);
+ // we are expecting to see trailing bytes like 10vvvvvv
+ if ((ch & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ goto BadLongCode;
+ }
+ pSrc += 2;
+
+ ch = (chc << 6) | (ch & 0x3F);
+
+ *pTarget = (char)(((ch >> 10) & 0x7FF) +
+ unchecked((short)(CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10))));
+ pTarget++;
+
+ ch = (ch & 0x3FF) +
+ unchecked((short)(CharUnicodeInfo.LOW_SURROGATE_START));
+
+ // extra byte, we're already planning 2 chars for 2 of these bytes,
+ // but the big loop is testing the target against pStop, so we need
+ // to subtract 2 more or we risk overrunning the input. Subtract
+ // one here and one below.
+ pStop--;
+ }
+ else
+ {
+ // 3 byte encoding
+ ch = *pSrc;
+ if (
+ // check for non-shortest form of 3 byte seq
+ (chc & (0x1F << 5)) == 0 ||
+ // Can't have surrogates here.
+ (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
+ // we are expecting to see trailing bytes like 10vvvvvv
+ (ch & unchecked((sbyte)0xC0)) != 0x80)
+ {
+ goto BadLongCode;
+ }
+ pSrc++;
+
+ ch = (chc << 6) | (ch & 0x3F);
+
+ // extra byte, we're only expecting 1 char for each of these 3 bytes,
+ // but the loop is testing the target (not source) against pStop, so
+ // we need to subtract 2 more or we risk overrunning the input.
+ // Subtract 1 here and one more below
+ pStop--;
+ }
+ }
+ else
+ {
+ // 2 byte encoding
+
+ ch &= 0x1F;
+
+ // check for non-shortest form
+ if (ch <= 1)
+ {
+ goto BadLongCode;
+ }
+ ch = (ch << 6) | chc;
+ }
+
+ *pTarget = (char)ch;
+ pTarget++;
+
+ // extra byte, we're only expecting 1 char for each of these 2 bytes,
+ // but the loop is testing the target (not source) against pStop.
+ // subtract an extra count from pStop so that we don't overrun the input.
+ pStop--;
+ }
+#endif // FASTLOOP
+
+ Debug.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetChars]pTarget <= pAllocatedBufferEnd");
+
+ // no pending bits at this point
+ ch = 0;
+ continue;
+
+ BadLongCode:
+ pSrc -= 2;
+ ch = 0;
+ continue;
+ }
+
+ if (ch != 0 && (baseDecoder == null || baseDecoder.MustFlush))
+ {
+ // Have to do fallback for invalid bytes
+ if (fallback == null)
+ {
+ if (baseDecoder == null)
+ fallback = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallback = baseDecoder.FallbackBuffer;
+ fallback.InternalInitialize(bytes, pAllocatedBufferEnd);
+ }
+
+ // This'll back us up the appropriate # of bytes if we didn't get anywhere
+ pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be enregistered
+ pTargetForFallback = pTarget; // Avoid passing pTarget by reference to allow it to be enregistered
+ bool fallbackResult = FallbackInvalidByteSequence(ref pSrcForFallback, ch, fallback, ref pTargetForFallback);
+ pSrc = pSrcForFallback;
+ pTarget = pTargetForFallback;
+
+ if (!fallbackResult)
+ {
+ Debug.Assert(pSrc >= bytes || pTarget == chars,
+ "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing");
+
+ // Ran out of buffer space
+ // Need to throw an exception?
+ fallback.InternalReset();
+ ThrowCharsOverflow(baseDecoder, pTarget == chars);
+ }
+ Debug.Assert(pSrc >= bytes,
+ "[UTF8Encoding.GetChars]Expected flushing invalid byte sequence to have remained within the byte array");
+ ch = 0;
+ }
+
+ if (baseDecoder != null)
+ {
+ UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
+
+ // If we're storing flush data we expect all bits to be used or else
+ // we're stuck in the middle of a conversion
+ Debug.Assert(!baseDecoder.MustFlush || ch == 0 || !baseDecoder.m_throwOnOverflow,
+ "[UTF8Encoding.GetChars]Expected no must flush or no left over bits or no throw on overflow.");
+
+ // Remember our leftover bits.
+ decoder.bits = ch;
+
+ baseDecoder.m_bytesUsed = (int)(pSrc - bytes);
+ }
+
+ // Shouldn't have anything in fallback buffer for GetChars
+ // (don't have to check m_throwOnOverflow for chars)
+ Debug.Assert(fallback == null || fallback.Remaining == 0,
+ "[UTF8Encoding.GetChars]Expected empty fallback buffer at end");
+
+ return PtrDiff(pTarget, chars);
+ }
+
+ // During GetChars we had an invalid byte sequence
+ // pSrc is backed up to the start of the bad sequence if we didn't have room to
+ // fall it back. Otherwise pSrc remains wher it is.
+ private unsafe bool FallbackInvalidByteSequence(
+ ref byte* pSrc, int ch, DecoderFallbackBuffer fallback, ref char* pTarget)
+ {
+ // Get our byte[]
+ byte* pStart = pSrc;
+ byte[] bytesUnknown = GetBytesUnknown(ref pStart, ch);
+
+ // Do the actual fallback
+ if (!fallback.InternalFallback(bytesUnknown, pSrc, ref pTarget))
+ {
+ // Oops, it failed, back up to pStart
+ pSrc = pStart;
+ return false;
+ }
+
+ // It worked
+ return true;
+ }
+
+ // During GetCharCount we had an invalid byte sequence
+ // pSrc is used to find the index that points to the invalid bytes,
+ // however the byte[] contains the fallback bytes (in case the index is -1)
+ private unsafe int FallbackInvalidByteSequence(
+ byte* pSrc, int ch, DecoderFallbackBuffer fallback)
+ {
+ // Get our byte[]
+ byte[] bytesUnknown = GetBytesUnknown(ref pSrc, ch);
+
+ // Do the actual fallback
+ int count = fallback.InternalFallback(bytesUnknown, pSrc);
+
+ // # of fallback chars expected.
+ // Note that we only get here for "long" sequences, and have already unreserved
+ // the count that we prereserved for the input bytes
+ return count;
+ }
+
+ // Note that some of these bytes may have come from a previous fallback, so we cannot
+ // just decrement the pointer and use the values we read. In those cases we have
+ // to regenerate the original values.
+ private unsafe byte[] GetBytesUnknown(ref byte* pSrc, int ch)
+ {
+ // Get our byte[]
+ byte[] bytesUnknown = null;
+
+ // See if it was a plain char
+ // (have to check >= 0 because we have all sorts of wierd bit flags)
+ if (ch < 0x100 && ch >= 0)
+ {
+ pSrc--;
+ bytesUnknown = new byte[] { unchecked((byte)ch) };
+ }
+ // See if its an unfinished 2 byte sequence
+ else if ((ch & (SupplimentarySeq | ThreeByteSeq)) == 0)
+ {
+ pSrc--;
+ bytesUnknown = new byte[] { unchecked((byte)((ch & 0x1F) | 0xc0)) };
+ }
+ // So now we're either 2nd byte of 3 or 4 byte sequence or
+ // we hit a non-trail byte or we ran out of space for 3rd byte of 4 byte sequence
+ // 1st check if its a 4 byte sequence
+ else if ((ch & SupplimentarySeq) != 0)
+ {
+ // 3rd byte of 4 byte sequence?
+ if ((ch & (FinalByte >> 6)) != 0)
+ {
+ // 3rd byte of 4 byte sequence
+ pSrc -= 3;
+ bytesUnknown = new byte[] {
+ unchecked((byte)(((ch >> 12) & 0x07) | 0xF0)),
+ unchecked((byte)(((ch >> 6) & 0x3F) | 0x80)),
+ unchecked((byte)(((ch) & 0x3F) | 0x80)) };
+ }
+ else if ((ch & (FinalByte >> 12)) != 0)
+ {
+ // 2nd byte of a 4 byte sequence
+ pSrc -= 2;
+ bytesUnknown = new byte[] {
+ unchecked((byte)(((ch >> 6) & 0x07) | 0xF0)),
+ unchecked((byte)(((ch) & 0x3F) | 0x80)) };
+ }
+ else
+ {
+ // 4th byte of a 4 byte sequence
+ pSrc--;
+ bytesUnknown = new byte[] { unchecked((byte)(((ch) & 0x07) | 0xF0)) };
+ }
+ }
+ else
+ {
+ // 2nd byte of 3 byte sequence?
+ if ((ch & (FinalByte >> 6)) != 0)
+ {
+ // So its 2nd byte of a 3 byte sequence
+ pSrc -= 2;
+ bytesUnknown = new byte[] {
+ unchecked((byte)(((ch >> 6) & 0x0F) | 0xE0)), unchecked ((byte)(((ch) & 0x3F) | 0x80)) };
+ }
+ else
+ {
+ // 1st byte of a 3 byte sequence
+ pSrc--;
+ bytesUnknown = new byte[] { unchecked((byte)(((ch) & 0x0F) | 0xE0)) };
+ }
+ }
+
+ return bytesUnknown;
+ }
+
+
+ public override Decoder GetDecoder()
+ {
+ return new UTF8Decoder(this);
+ }
+
+
+ public override Encoder GetEncoder()
+ {
+ return new UTF8Encoder(this);
+ }
+
+
+ public override int GetMaxByteCount(int charCount)
+ {
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(charCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
+ long byteCount = (long)charCount + 1;
+
+ if (EncoderFallback.MaxCharCount > 1)
+ byteCount *= EncoderFallback.MaxCharCount;
+
+ // Max 3 bytes per char. (4 bytes per 2 chars for surrogates)
+ byteCount *= 3;
+
+ if (byteCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ return (int)byteCount;
+ }
+
+
+ public override int GetMaxCharCount(int byteCount)
+ {
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(byteCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Figure out our length, 1 char per input byte + 1 char if 1st byte is last byte of 4 byte surrogate pair
+ long charCount = ((long)byteCount + 1);
+
+ // Non-shortest form would fall back, so get max count from fallback.
+ // So would 11... followed by 11..., so you could fall back every byte
+ if (DecoderFallback.MaxCharCount > 1)
+ {
+ charCount *= DecoderFallback.MaxCharCount;
+ }
+
+ if (charCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
+
+ return (int)charCount;
+ }
+
+
+ public override byte[] GetPreamble()
+ {
+ if (_emitUTF8Identifier)
+ {
+ // Allocate new array to prevent users from modifying it.
+ return new byte[3] { 0xEF, 0xBB, 0xBF };
+ }
+ else
+ return Array.Empty<byte>();
+ }
+
+
+ public override bool Equals(Object value)
+ {
+ UTF8Encoding that = value as UTF8Encoding;
+ if (that != null)
+ {
+ return (_emitUTF8Identifier == that._emitUTF8Identifier) &&
+ (EncoderFallback.Equals(that.EncoderFallback)) &&
+ (DecoderFallback.Equals(that.DecoderFallback));
+ }
+ return (false);
+ }
+
+
+ public override int GetHashCode()
+ {
+ //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
+ return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
+ UTF8_CODEPAGE + (_emitUTF8Identifier ? 1 : 0);
+ }
+
+ [Serializable]
+ private sealed class UTF8Encoder : EncoderNLS, ISerializable
+ {
+ // We must save a high surrogate value until the next call, looking
+ // for a low surrogate value.
+ internal int surrogateChar;
+
+ public UTF8Encoder(UTF8Encoding encoding) : base(encoding)
+ {
+ // base calls reset
+ }
+
+ // Constructor called by serialization, have to handle deserializing from Everett
+ internal UTF8Encoder(SerializationInfo info, StreamingContext context)
+ {
+ // Any info?
+ if (info == null) throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ // Get common info
+ this.m_encoding = (Encoding)info.GetValue("encoding", typeof(Encoding));
+
+ // SurrogateChar happens to mean the same thing
+ this.surrogateChar = (int)info.GetValue("surrogateChar", typeof(int));
+
+ try
+ {
+ this.m_fallback = (EncoderFallback)info.GetValue("m_fallback", typeof(EncoderFallback));
+ }
+ catch (SerializationException)
+ {
+ this.m_fallback = null;
+ }
+ }
+
+ // ISerializable implementation, get data for this object
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ // Any info?
+ if (info == null) throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ // Save Whidbey data
+ // Just need Everett maxCharSize (BaseCodePageEncoding) or m_maxByteSize (MLangBaseCodePageEncoding)
+ info.AddValue("encoding", this.m_encoding);
+ info.AddValue("surrogateChar", this.surrogateChar);
+
+ info.AddValue("m_fallback", this.m_fallback);
+
+ // Extra stuff for Everett that Whidbey doesn't use
+ info.AddValue("storedSurrogate", this.surrogateChar > 0 ? true : false);
+ info.AddValue("mustFlush", false); // Everett doesn't actually use this either, but it accidently serialized it!
+ }
+
+ public override void Reset()
+
+ {
+ this.surrogateChar = 0;
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Anything left in our encoder?
+ internal override bool HasState
+ {
+ get
+ {
+ return (this.surrogateChar != 0);
+ }
+ }
+ }
+
+ [Serializable]
+ private sealed class UTF8Decoder : DecoderNLS, ISerializable
+ {
+ // We'll need to remember the previous information. See the comments around definition
+ // of FinalByte for details.
+ internal int bits;
+
+ public UTF8Decoder(UTF8Encoding encoding) : base(encoding)
+ {
+ // base calls reset
+ }
+
+ // Constructor called by serialization, have to handle deserializing from Everett
+ internal UTF8Decoder(SerializationInfo info, StreamingContext context)
+ {
+ // Any info?
+ if (info == null) throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ // Get common info
+ this.m_encoding = (Encoding)info.GetValue("encoding", typeof(Encoding));
+
+ try
+ {
+ // Get whidbey version of bits
+ this.bits = (int)info.GetValue("wbits", typeof(int));
+ this.m_fallback = (DecoderFallback)info.GetValue("m_fallback", typeof(DecoderFallback));
+ }
+ catch (SerializationException)
+ {
+ // Everett calls bits bits instead of wbits, so this is Everett
+ this.bits = 0;
+ this.m_fallback = null;
+ }
+ }
+
+ // ISerializable implementation, get data for this object
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ // Any info?
+ if (info == null) throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ // Save new Whidbey data
+ info.AddValue("encoding", this.m_encoding);
+ info.AddValue("wbits", this.bits); // Special whidbey bits name
+ info.AddValue("m_fallback", this.m_fallback);
+
+ // Everett has extra stuff, we set it all to 0 in case this deserializes in Everett
+ info.AddValue("bits", (int)0);
+ info.AddValue("trailCount", (int)0);
+ info.AddValue("isSurrogate", false);
+ info.AddValue("byteSequence", (int)0);
+ }
+
+ public override void Reset()
+ {
+ this.bits = 0;
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Anything left in our decoder?
+ internal override bool HasState
+ {
+ get
+ {
+ return (this.bits != 0);
+ }
+ }
+ }
+ }
+}
diff --git a/src/mscorlib/shared/System/Text/UnicodeEncoding.cs b/src/mscorlib/shared/System/Text/UnicodeEncoding.cs
new file mode 100644
index 0000000000..0e4db9aaad
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/UnicodeEncoding.cs
@@ -0,0 +1,2058 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
+//
+
+using System;
+using System.Globalization;
+using System.Runtime.Serialization;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+
+namespace System.Text
+{
+ [Serializable]
+ public class UnicodeEncoding : Encoding
+ {
+ // Used by Encoding.BigEndianUnicode/Unicode for lazy initialization
+ // The initialization code will not be run until a static member of the class is referenced
+ internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true);
+ internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true);
+
+ [OptionalField(VersionAdded = 2)]
+ internal bool isThrowException = false;
+
+ internal bool bigEndian = false;
+ internal bool byteOrderMark = true;
+
+ // Unicode version 2.0 character size in bytes
+ public const int CharSize = 2;
+
+
+ public UnicodeEncoding()
+ : this(false, true)
+ {
+ }
+
+
+ public UnicodeEncoding(bool bigEndian, bool byteOrderMark)
+ : this(bigEndian, byteOrderMark, false)
+ {
+ }
+
+
+ public UnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
+ : base(bigEndian ? 1201 : 1200) //Set the data item.
+ {
+ this.isThrowException = throwOnInvalidBytes;
+ this.bigEndian = bigEndian;
+ this.byteOrderMark = byteOrderMark;
+
+ // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
+ if (this.isThrowException)
+ SetDefaultFallbacks();
+ }
+
+ #region Serialization
+ [OnDeserializing]
+ private void OnDeserializing(StreamingContext ctx)
+ {
+ // In Everett it is false. Whidbey will overwrite this value.
+ isThrowException = false;
+ }
+ #endregion Serialization
+
+ internal override void SetDefaultFallbacks()
+ {
+ // For UTF-X encodings, we use a replacement fallback with an empty string
+ if (this.isThrowException)
+ {
+ this.encoderFallback = EncoderFallback.ExceptionFallback;
+ this.decoderFallback = DecoderFallback.ExceptionFallback;
+ }
+ else
+ {
+ this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
+ this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
+ }
+ }
+
+ // The following methods are copied from EncodingNLS.cs.
+ // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
+ // These should be kept in sync for the following classes:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ //
+
+ // Returns the number of bytes required to encode a range of characters in
+ // a character array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(char[] chars, int index, int count)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - index < count)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input, return 0, avoid fixed empty array problem
+ if (count == 0)
+ return 0;
+
+ // Just call the pointer version
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(String s)
+ {
+ // Validate input
+ if (s==null)
+ throw new ArgumentNullException("s");
+ Contract.EndContractBlock();
+
+ fixed (char* pChars = s)
+ return GetByteCount(pChars, s.Length, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetByteCount(char* chars, int count)
+ {
+ // Validate Parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Call it with empty encoder
+ return GetByteCount(chars, count, null);
+ }
+
+ // Parent method is safe.
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ public override unsafe int GetBytes(String s, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ if (s == null || bytes == null)
+ throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (s.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like 0 length arrays.
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. An exception occurs if the byte array is not large
+ // enough to hold the complete encoding of the characters. The
+ // GetByteCount method can be used to determine the exact number of
+ // bytes that will be produced for a given range of characters.
+ // Alternatively, the GetMaxByteCount method can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ // Validate parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If nothing to encode return 0, avoid fixed problem
+ if (charCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fixed doesn't like 0 length arrays.
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
+ // Remember that byteCount is # to decode, not size of array.
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetBytes(chars, charCount, bytes, byteCount, null);
+ }
+
+ // Returns the number of characters produced by decoding a range of bytes
+ // in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetCharCount(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input just return 0, fixed doesn't like 0 length arrays
+ if (count == 0)
+ return 0;
+
+ // Just call pointer version
+ fixed (byte* pBytes = bytes)
+ return GetCharCount(pBytes + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetCharCount(byte* bytes, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetCharCount(bytes, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if ( bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (charIndex < 0 || charIndex > chars.Length)
+ throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If no input, return 0 & avoid fixed problem
+ if (byteCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int charCount = chars.Length - charIndex;
+
+ // Fixed doesn't like 0 length arrays.
+ if (chars.Length == 0)
+ chars = new char[1];
+
+ fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
+ // Remember that charCount is # to decode, not size of array
+ return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetChars(bytes, byteCount, chars, charCount, null);
+ }
+
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe string GetString(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // Avoid problems with empty input buffer
+ if (count == 0) return String.Empty;
+
+ fixed (byte* pBytes = bytes)
+ return String.CreateStringFromEncoding(
+ pBytes + index, count, this);
+ }
+
+ //
+ // End of standard methods copied from EncodingNLS.cs
+ //
+
+ internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
+ {
+ Debug.Assert(chars != null, "[UnicodeEncoding.GetByteCount]chars!=null");
+ Debug.Assert(count >= 0, "[UnicodeEncoding.GetByteCount]count >=0");
+
+ // Start by assuming each char gets 2 bytes
+ int byteCount = count << 1;
+
+ // Check for overflow in byteCount
+ // (If they were all invalid chars, this would actually be wrong,
+ // but that's a ridiculously large # so we're not concerned about that case)
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ char* charStart = chars;
+ char* charEnd = chars + count;
+ char charLeftOver = (char)0;
+
+ bool wasHereBefore = false;
+
+ // Need -1 to check 2 at a time. If we have an even #, longChars will go
+ // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longChars
+ // will go from longEnd - 1 long to longEnd. (Might not get to use this)
+ ulong* longEnd = (ulong*)(charEnd - 3);
+
+ // For fallback we may need a fallback buffer
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ if (encoder != null)
+ {
+ charLeftOver = encoder.charLeftOver;
+
+ // Assume extra bytes to encode charLeftOver if it existed
+ if (charLeftOver > 0)
+ byteCount += 2;
+
+ // We mustn't have left over fallback data when counting
+ if (encoder.InternalHasFallbackBuffer)
+ {
+ fallbackBuffer = encoder.FallbackBuffer;
+ if (fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
+ }
+ }
+
+ char ch;
+ TryAgain:
+
+ while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, maybe we can do it fast
+#if !NO_FAST_UNICODE_LOOP
+#if BIGENDIAN // If endianess is backwards then each pair of bytes would be backwards.
+ if ( bigEndian &&
+#else
+ if (!bigEndian &&
+#endif // BIGENDIAN
+
+#if BIT64 // 64 bit CPU needs to be long aligned for this to work.
+ charLeftOver == 0 && (unchecked((long)chars) & 7) == 0)
+#else
+ charLeftOver == 0 && (unchecked((int)chars) & 3) == 0)
+#endif
+ {
+ // Need new char* so we can check 4 at a time
+ ulong* longChars = (ulong*)chars;
+
+ while (longChars < longEnd)
+ {
+ // See if we potentially have surrogates (0x8000 bit set)
+ // (We're either big endian on a big endian machine or little endian on
+ // a little endian machine so this'll work)
+ if ((0x8000800080008000 & *longChars) != 0)
+ {
+ // See if any of these are high or low surrogates (0xd800 - 0xdfff). If the high
+ // 5 bits looks like 11011, then its a high or low surrogate.
+ // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
+ // Note that we expect BMP characters to be more common than surrogates
+ // & each char with 11111... then ^ with 11011. Zeroes then indicate surrogates
+ ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
+
+ // Check each of the 4 chars. 0 for those 16 bits means it was a surrogate
+ // but no clue if they're high or low.
+ // If each of the 4 characters are non-zero, then none are surrogates.
+ if ((uTemp & 0xFFFF000000000000) == 0 ||
+ (uTemp & 0x0000FFFF00000000) == 0 ||
+ (uTemp & 0x00000000FFFF0000) == 0 ||
+ (uTemp & 0x000000000000FFFF) == 0)
+ {
+ // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
+ // or if there's 1 or 4 surrogates
+
+ // If they happen to be high/low/high/low, we may as well continue. Check the next
+ // bit to see if its set (low) or not (high) in the right pattern
+#if BIGENDIAN
+ if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
+#else
+ if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
+#endif
+ {
+ // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
+ // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
+
+ // Drop out to the slow loop to resolve the surrogates
+ break;
+ }
+ // else they are all surrogates in High/Low/High/Low order, so we can use them.
+ }
+ // else none are surrogates, so we can use them.
+ }
+ // else all < 0x8000 so we can use them
+
+ // We already counted these four chars, go to next long.
+ longChars++;
+ }
+
+ chars = (char*)longChars;
+
+ if (chars >= charEnd)
+ break;
+ }
+#endif // !NO_FAST_UNICODE_LOOP
+
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+ else
+ {
+ // We weren't preallocating fallback space.
+ byteCount += 2;
+ }
+
+ // Check for high or low surrogates
+ if (ch >= 0xd800 && ch <= 0xdfff)
+ {
+ // Was it a high surrogate?
+ if (ch <= 0xdbff)
+ {
+ // Its a high surrogate, if we already had a high surrogate do its fallback
+ if (charLeftOver > 0)
+ {
+ // Unwind the current character, this should be safe because we
+ // don't have leftover data in the fallback, so chars must have
+ // advanced already.
+ Debug.Assert(chars > charStart,
+ "[UnicodeEncoding.GetByteCount]Expected chars to have advanced in unexpected high surrogate");
+ chars--;
+
+ // If previous high surrogate deallocate 2 bytes
+ byteCount -= 2;
+
+ // Fallback the previous surrogate
+ // Need to initialize fallback buffer?
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
+ }
+
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Now no high surrogate left over
+ charLeftOver = (char)0;
+ continue;
+ }
+
+ // Remember this high surrogate
+ charLeftOver = ch;
+ continue;
+ }
+
+
+ // Its a low surrogate
+ if (charLeftOver == 0)
+ {
+ // Expected a previous high surrogate.
+ // Don't count this one (we'll count its fallback if necessary)
+ byteCount -= 2;
+
+ // fallback this one
+ // Need to initialize fallback buffer?
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
+ }
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+ continue;
+ }
+
+ // Valid surrogate pair, add our charLeftOver
+ charLeftOver = (char)0;
+ continue;
+ }
+ else if (charLeftOver > 0)
+ {
+ // Expected a low surrogate, but this char is normal
+
+ // Rewind the current character, fallback previous character.
+ // this should be safe because we don't have leftover data in the
+ // fallback, so chars must have advanced already.
+ Debug.Assert(chars > charStart,
+ "[UnicodeEncoding.GetByteCount]Expected chars to have advanced when expected low surrogate");
+ chars--;
+
+ // fallback previous chars
+ // Need to initialize fallback buffer?
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
+ }
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Ignore charLeftOver or throw
+ byteCount -= 2;
+ charLeftOver = (char)0;
+
+ continue;
+ }
+
+ // Ok we had something to add (already counted)
+ }
+
+ // Don't allocate space for left over char
+ if (charLeftOver > 0)
+ {
+ byteCount -= 2;
+
+ // If we have to flush, stick it in fallback and try again
+ if (encoder == null || encoder.MustFlush)
+ {
+ if (wasHereBefore)
+ {
+ // Throw it, using our complete character
+ throw new ArgumentException(
+ SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
+ }
+ else
+ {
+ // Need to initialize fallback buffer?
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
+ }
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+ charLeftOver = (char)0;
+ wasHereBefore = true;
+ goto TryAgain;
+ }
+ }
+ }
+
+ // Shouldn't have anything in fallback buffer for GetByteCount
+ // (don't have to check m_throwOnOverflow for count)
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[UnicodeEncoding.GetByteCount]Expected empty fallback buffer at end");
+
+ // Don't remember fallbackBuffer.encoder for counting
+ return byteCount;
+ }
+
+ internal override unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, EncoderNLS encoder)
+ {
+ Debug.Assert(chars != null, "[UnicodeEncoding.GetBytes]chars!=null");
+ Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetBytes]byteCount >=0");
+ Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetBytes]charCount >=0");
+ Debug.Assert(bytes != null, "[UnicodeEncoding.GetBytes]bytes!=null");
+
+ char charLeftOver = (char)0;
+ char ch;
+ bool wasHereBefore = false;
+
+
+ byte* byteEnd = bytes + byteCount;
+ char* charEnd = chars + charCount;
+ byte* byteStart = bytes;
+ char* charStart = chars;
+
+ // For fallback we may need a fallback buffer
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ // Get our encoder, but don't clear it yet.
+ if (encoder != null)
+ {
+ charLeftOver = encoder.charLeftOver;
+
+ // We mustn't have left over fallback data when counting
+ if (encoder.InternalHasFallbackBuffer)
+ {
+ // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
+ fallbackBuffer = encoder.FallbackBuffer;
+ if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
+ }
+ }
+
+ TryAgain:
+ while (((ch = (fallbackBuffer == null) ?
+ (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) ||
+ chars < charEnd)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, maybe we can do it fast
+#if !NO_FAST_UNICODE_LOOP
+#if BIGENDIAN // If endianess is backwards then each pair of bytes would be backwards.
+ if ( bigEndian &&
+#else
+ if (!bigEndian &&
+#endif // BIGENDIAN
+#if BIT64 // 64 bit CPU needs to be long aligned for this to work, 32 bit CPU needs to be 32 bit aligned
+ (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
+#else
+ (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
+#endif // BIT64
+ charLeftOver == 0)
+ {
+ // Need -1 to check 2 at a time. If we have an even #, longChars will go
+ // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longChars
+ // will go from longEnd - 1 long to longEnd. (Might not get to use this)
+ // We can only go iCount units (limited by shorter of char or byte buffers.
+ ulong* longEnd = (ulong*)(chars - 3 +
+ (((byteEnd - bytes) >> 1 < charEnd - chars) ?
+ (byteEnd - bytes) >> 1 : charEnd - chars));
+
+ // Need new char* so we can check 4 at a time
+ ulong* longChars = (ulong*)chars;
+ ulong* longBytes = (ulong*)bytes;
+
+ while (longChars < longEnd)
+ {
+ // See if we potentially have surrogates (0x8000 bit set)
+ // (We're either big endian on a big endian machine or little endian on
+ // a little endian machine so this'll work)
+ if ((0x8000800080008000 & *longChars) != 0)
+ {
+ // See if any of these are high or low surrogates (0xd800 - 0xdfff). If the high
+ // 5 bits looks like 11011, then its a high or low surrogate.
+ // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
+ // Note that we expect BMP characters to be more common than surrogates
+ // & each char with 11111... then ^ with 11011. Zeroes then indicate surrogates
+ ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
+
+ // Check each of the 4 chars. 0 for those 16 bits means it was a surrogate
+ // but no clue if they're high or low.
+ // If each of the 4 characters are non-zero, then none are surrogates.
+ if ((uTemp & 0xFFFF000000000000) == 0 ||
+ (uTemp & 0x0000FFFF00000000) == 0 ||
+ (uTemp & 0x00000000FFFF0000) == 0 ||
+ (uTemp & 0x000000000000FFFF) == 0)
+ {
+ // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
+ // or if there's 1 or 4 surrogates
+
+ // If they happen to be high/low/high/low, we may as well continue. Check the next
+ // bit to see if its set (low) or not (high) in the right pattern
+#if BIGENDIAN
+ if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
+#else
+ if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
+#endif
+ {
+ // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
+ // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
+
+ // Drop out to the slow loop to resolve the surrogates
+ break;
+ }
+ // else they are all surrogates in High/Low/High/Low order, so we can use them.
+ }
+ // else none are surrogates, so we can use them.
+ }
+ // else all < 0x8000 so we can use them
+
+ // We can use these 4 chars.
+ *longBytes = *longChars;
+ longChars++;
+ longBytes++;
+ }
+
+ chars = (char*)longChars;
+ bytes = (byte*)longBytes;
+
+ if (chars >= charEnd)
+ break;
+ }
+ // Not aligned, but maybe we can still be somewhat faster
+ // Also somehow this optimizes the above loop? It seems to cause something above
+ // to get enregistered, but I haven't figured out how to make that happen without this loop.
+ else if ((charLeftOver == 0) &&
+#if BIGENDIAN
+ bigEndian &&
+#else
+ !bigEndian &&
+#endif // BIGENDIAN
+
+#if BIT64
+ (unchecked((long)chars) & 7) != (unchecked((long)bytes) & 7) && // Only do this if chars & bytes are out of line, otherwise faster loop'll be faster next time
+#else
+ (unchecked((int)chars) & 3) != (unchecked((int)bytes) & 3) && // Only do this if chars & bytes are out of line, otherwise faster loop'll be faster next time
+#endif // BIT64
+ (unchecked((int)(bytes)) & 1) == 0)
+ {
+ // # to use
+ long iCount = ((byteEnd - bytes) >> 1 < charEnd - chars) ?
+ (byteEnd - bytes) >> 1 : charEnd - chars;
+
+ // Need new char*
+ char* charOut = ((char*)bytes); // a char* for our output
+ char* tempEnd = chars + iCount - 1; // Our end pointer
+
+ while (chars < tempEnd)
+ {
+ if (*chars >= (char)0xd800 && *chars <= (char)0xdfff)
+ {
+ // break for fallback for low surrogate
+ if (*chars >= 0xdc00)
+ break;
+
+ // break if next one's not a low surrogate (will do fallback)
+ if (*(chars + 1) < 0xdc00 || *(chars + 1) > 0xdfff)
+ break;
+
+ // They both exist, use them
+ }
+ // If 2nd char is surrogate & this one isn't then only add one
+ else if (*(chars + 1) >= (char)0xd800 && *(chars + 1) <= 0xdfff)
+ {
+ *charOut = *chars;
+ charOut++;
+ chars++;
+ continue;
+ }
+
+ *charOut = *chars;
+ *(charOut + 1) = *(chars + 1);
+ charOut += 2;
+ chars += 2;
+ }
+
+ bytes = (byte*)charOut;
+
+ if (chars >= charEnd)
+ break;
+ }
+#endif // !NO_FAST_UNICODE_LOOP
+
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Check for high or low surrogates
+ if (ch >= 0xd800 && ch <= 0xdfff)
+ {
+ // Was it a high surrogate?
+ if (ch <= 0xdbff)
+ {
+ // Its a high surrogate, see if we already had a high surrogate
+ if (charLeftOver > 0)
+ {
+ // Unwind the current character, this should be safe because we
+ // don't have leftover data in the fallback, so chars must have
+ // advanced already.
+ Debug.Assert(chars > charStart,
+ "[UnicodeEncoding.GetBytes]Expected chars to have advanced in unexpected high surrogate");
+ chars--;
+
+ // Fallback the previous surrogate
+ // Might need to create our fallback buffer
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
+ }
+
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+
+ charLeftOver = (char)0;
+ continue;
+ }
+
+ // Remember this high surrogate
+ charLeftOver = ch;
+ continue;
+ }
+
+ // Its a low surrogate
+ if (charLeftOver == 0)
+ {
+ // We'll fall back this one
+ // Might need to create our fallback buffer
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
+ }
+
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+ continue;
+ }
+
+ // Valid surrogate pair, add our charLeftOver
+ if (bytes + 3 >= byteEnd)
+ {
+ // Not enough room to add this surrogate pair
+ if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
+ {
+ // These must have both been from the fallbacks.
+ // Both of these MUST have been from a fallback because if the 1st wasn't
+ // from a fallback, then a high surrogate followed by an illegal char
+ // would've caused the high surrogate to fall back. If a high surrogate
+ // fell back, then it was consumed and both chars came from the fallback.
+ fallbackBuffer.MovePrevious(); // Didn't use either fallback surrogate
+ fallbackBuffer.MovePrevious();
+ }
+ else
+ {
+ // If we don't have enough room, then either we should've advanced a while
+ // or we should have bytes==byteStart and throw below
+ Debug.Assert(chars > charStart + 1 || bytes == byteStart,
+ "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
+ chars -= 2; // Didn't use either surrogate
+ }
+ ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
+ charLeftOver = (char)0; // we'll retry it later
+ break; // Didn't throw, but stop 'til next time.
+ }
+
+ if (bigEndian)
+ {
+ *(bytes++) = (byte)(charLeftOver >> 8);
+ *(bytes++) = (byte)charLeftOver;
+ }
+ else
+ {
+ *(bytes++) = (byte)charLeftOver;
+ *(bytes++) = (byte)(charLeftOver >> 8);
+ }
+
+ charLeftOver = (char)0;
+ }
+ else if (charLeftOver > 0)
+ {
+ // Expected a low surrogate, but this char is normal
+
+ // Rewind the current character, fallback previous character.
+ // this should be safe because we don't have leftover data in the
+ // fallback, so chars must have advanced already.
+ Debug.Assert(chars > charStart,
+ "[UnicodeEncoding.GetBytes]Expected chars to have advanced after expecting low surrogate");
+ chars--;
+
+ // fallback previous chars
+ // Might need to create our fallback buffer
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
+ }
+
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Ignore charLeftOver or throw
+ charLeftOver = (char)0;
+ continue;
+ }
+
+ // Ok, we have a char to add
+ if (bytes + 1 >= byteEnd)
+ {
+ // Couldn't add this char
+ if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
+ fallbackBuffer.MovePrevious(); // Not using this fallback char
+ else
+ {
+ // Lonely charLeftOver (from previous call) would've been caught up above,
+ // so this must be a case where we've already read an input char.
+ Debug.Assert(chars > charStart,
+ "[UnicodeEncoding.GetBytes]Expected chars to have advanced for failed fallback");
+ chars--; // Not using this char
+ }
+ ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
+ break; // didn't throw, just stop
+ }
+
+ if (bigEndian)
+ {
+ *(bytes++) = (byte)(ch >> 8);
+ *(bytes++) = (byte)ch;
+ }
+ else
+ {
+ *(bytes++) = (byte)ch;
+ *(bytes++) = (byte)(ch >> 8);
+ }
+ }
+
+ // Don't allocate space for left over char
+ if (charLeftOver > 0)
+ {
+ // If we aren't flushing we need to fall this back
+ if (encoder == null || encoder.MustFlush)
+ {
+ if (wasHereBefore)
+ {
+ // Throw it, using our complete character
+ throw new ArgumentException(
+ SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
+ }
+ else
+ {
+ // If we have to flush, stick it in fallback and try again
+ // Might need to create our fallback buffer
+ if (fallbackBuffer == null)
+ {
+ if (encoder == null)
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
+ }
+
+ // If we're not flushing, this'll remember the left over character.
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
+ chars = charsForFallback;
+
+ charLeftOver = (char)0;
+ wasHereBefore = true;
+ goto TryAgain;
+ }
+ }
+ }
+
+ // Not flushing, remember it in the encoder
+ if (encoder != null)
+ {
+ encoder.charLeftOver = charLeftOver;
+ encoder.m_charsUsed = (int)(chars - charStart);
+ }
+
+ // Remember charLeftOver if we must, or clear it if we're flushing
+ // (charLeftOver should be 0 if we're flushing)
+ Debug.Assert((encoder != null && !encoder.MustFlush) || charLeftOver == (char)0,
+ "[UnicodeEncoding.GetBytes] Expected no left over characters if flushing");
+
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
+ encoder == null || !encoder.m_throwOnOverflow,
+ "[UnicodeEncoding.GetBytes]Expected empty fallback buffer if not converting");
+
+ // We used to copy it fast, but this doesn't check for surrogates
+ // System.IO.__UnmanagedMemoryStream.memcpyimpl(bytes, (byte*)chars, usedByteCount);
+
+ return (int)(bytes - byteStart);
+ }
+
+ internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(bytes != null, "[UnicodeEncoding.GetCharCount]bytes!=null");
+ Debug.Assert(count >= 0, "[UnicodeEncoding.GetCharCount]count >=0");
+
+ UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
+
+ byte* byteEnd = bytes + count;
+ byte* byteStart = bytes;
+
+ // Need last vars
+ int lastByte = -1;
+ char lastChar = (char)0;
+
+ // Start by assuming same # of chars as bytes
+ int charCount = count >> 1;
+
+ // Need -1 to check 2 at a time. If we have an even #, longBytes will go
+ // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longBytes
+ // will go from longEnd - 1 long to longEnd. (Might not get to use this)
+ ulong* longEnd = (ulong*)(byteEnd - 7);
+
+ // For fallback we may need a fallback buffer
+ DecoderFallbackBuffer fallbackBuffer = null;
+
+ if (decoder != null)
+ {
+ lastByte = decoder.lastByte;
+ lastChar = decoder.lastChar;
+
+ // Assume extra char if last char was around
+ if (lastChar > 0)
+ charCount++;
+
+ // Assume extra char if extra last byte makes up odd # of input bytes
+ if (lastByte >= 0 && (count & 1) == 1)
+ {
+ charCount++;
+ }
+
+ // Shouldn't have anything in fallback buffer for GetCharCount
+ // (don't have to check m_throwOnOverflow for count)
+ Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
+ "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at start");
+ }
+
+ while (bytes < byteEnd)
+ {
+ // If we're aligned then maybe we can do it fast
+ // This'll hurt if we're unaligned because we'll always test but never be aligned
+#if !NO_FAST_UNICODE_LOOP
+#if BIGENDIAN
+ if (bigEndian &&
+#else // BIGENDIAN
+ if (!bigEndian &&
+#endif // BIGENDIAN
+#if BIT64 // win64 has to be long aligned
+ (unchecked((long)bytes) & 7) == 0 &&
+#else
+ (unchecked((int)bytes) & 3) == 0 &&
+#endif // BIT64
+ lastByte == -1 && lastChar == 0)
+ {
+ // Need new char* so we can check 4 at a time
+ ulong* longBytes = (ulong*)bytes;
+
+ while (longBytes < longEnd)
+ {
+ // See if we potentially have surrogates (0x8000 bit set)
+ // (We're either big endian on a big endian machine or little endian on
+ // a little endian machine so this'll work)
+ if ((0x8000800080008000 & *longBytes) != 0)
+ {
+ // See if any of these are high or low surrogates (0xd800 - 0xdfff). If the high
+ // 5 bits looks like 11011, then its a high or low surrogate.
+ // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
+ // Note that we expect BMP characters to be more common than surrogates
+ // & each char with 11111... then ^ with 11011. Zeroes then indicate surrogates
+ ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
+
+ // Check each of the 4 chars. 0 for those 16 bits means it was a surrogate
+ // but no clue if they're high or low.
+ // If each of the 4 characters are non-zero, then none are surrogates.
+ if ((uTemp & 0xFFFF000000000000) == 0 ||
+ (uTemp & 0x0000FFFF00000000) == 0 ||
+ (uTemp & 0x00000000FFFF0000) == 0 ||
+ (uTemp & 0x000000000000FFFF) == 0)
+ {
+ // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
+ // or if there's 1 or 4 surrogates
+
+ // If they happen to be high/low/high/low, we may as well continue. Check the next
+ // bit to see if its set (low) or not (high) in the right pattern
+#if BIGENDIAN
+ if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
+#else
+ if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
+#endif
+ {
+ // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
+ // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
+
+ // Drop out to the slow loop to resolve the surrogates
+ break;
+ }
+ // else they are all surrogates in High/Low/High/Low order, so we can use them.
+ }
+ // else none are surrogates, so we can use them.
+ }
+ // else all < 0x8000 so we can use them
+
+ // We can use these 4 chars.
+ longBytes++;
+ }
+
+ bytes = (byte*)longBytes;
+
+ if (bytes >= byteEnd)
+ break;
+ }
+#endif // !NO_FAST_UNICODE_LOOP
+
+ // Get 1st byte
+ if (lastByte < 0)
+ {
+ lastByte = *bytes++;
+ if (bytes >= byteEnd) break;
+ }
+
+ // Get full char
+ char ch;
+ if (bigEndian)
+ {
+ ch = (char)(lastByte << 8 | *(bytes++));
+ }
+ else
+ {
+ ch = (char)(*(bytes++) << 8 | lastByte);
+ }
+ lastByte = -1;
+
+ // See if the char's valid
+ if (ch >= 0xd800 && ch <= 0xdfff)
+ {
+ // Was it a high surrogate?
+ if (ch <= 0xdbff)
+ {
+ // Its a high surrogate, if we had one then do fallback for previous one
+ if (lastChar > 0)
+ {
+ // Ignore previous bad high surrogate
+ charCount--;
+
+ // Get fallback for previous high surrogate
+ // Note we have to reconstruct bytes because some may have been in decoder
+ byte[] byteBuffer = null;
+ if (bigEndian)
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
+ }
+ else
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
+ }
+
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, null);
+ }
+
+ // Get fallback.
+ charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
+ }
+
+ // Ignore the last one which fell back already,
+ // and remember the new high surrogate
+ lastChar = ch;
+ continue;
+ }
+
+ // Its a low surrogate
+ if (lastChar == 0)
+ {
+ // Expected a previous high surrogate
+ charCount--;
+
+ // Get fallback for this low surrogate
+ // Note we have to reconstruct bytes because some may have been in decoder
+ byte[] byteBuffer = null;
+ if (bigEndian)
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
+ }
+ else
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
+ }
+
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, null);
+ }
+
+ charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
+
+ // Ignore this one (we already did its fallback)
+ continue;
+ }
+
+ // Valid surrogate pair, already counted.
+ lastChar = (char)0;
+ }
+ else if (lastChar > 0)
+ {
+ // Had a high surrogate, expected a low surrogate
+ // Uncount the last high surrogate
+ charCount--;
+
+ // fall back the high surrogate.
+ byte[] byteBuffer = null;
+ if (bigEndian)
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
+ }
+ else
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
+ }
+
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, null);
+ }
+
+ // Already subtracted high surrogate
+ charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
+
+ // Not left over now, clear previous high surrogate and continue to add current char
+ lastChar = (char)0;
+ }
+
+ // Valid char, already counted
+ }
+
+ // Extra space if we can't use decoder
+ if (decoder == null || decoder.MustFlush)
+ {
+ if (lastChar > 0)
+ {
+ // No hanging high surrogates allowed, do fallback and remove count for it
+ charCount--;
+ byte[] byteBuffer = null;
+ if (bigEndian)
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
+ }
+ else
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
+ }
+
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, null);
+ }
+
+ charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
+
+ lastChar = (char)0;
+ }
+
+ if (lastByte >= 0)
+ {
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, null);
+ }
+
+ // No hanging odd bytes allowed if must flush
+ charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes);
+ lastByte = -1;
+ }
+ }
+
+ // If we had a high surrogate left over, we can't count it
+ if (lastChar > 0)
+ charCount--;
+
+ // Shouldn't have anything in fallback buffer for GetCharCount
+ // (don't have to check m_throwOnOverflow for count)
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at end");
+
+ return charCount;
+ }
+
+ internal override unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(chars != null, "[UnicodeEncoding.GetChars]chars!=null");
+ Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetChars]byteCount >=0");
+ Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetChars]charCount >=0");
+ Debug.Assert(bytes != null, "[UnicodeEncoding.GetChars]bytes!=null");
+
+ UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
+
+ // Need last vars
+ int lastByte = -1;
+ char lastChar = (char)0;
+
+ // Get our decoder (but don't clear it yet)
+ if (decoder != null)
+ {
+ lastByte = decoder.lastByte;
+ lastChar = decoder.lastChar;
+
+ // Shouldn't have anything in fallback buffer for GetChars
+ // (don't have to check m_throwOnOverflow for chars)
+ Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
+ "[UnicodeEncoding.GetChars]Expected empty fallback buffer at start");
+ }
+
+ // For fallback we may need a fallback buffer
+ DecoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ byte* byteEnd = bytes + byteCount;
+ char* charEnd = chars + charCount;
+ byte* byteStart = bytes;
+ char* charStart = chars;
+
+ while (bytes < byteEnd)
+ {
+ // If we're aligned then maybe we can do it fast
+ // This'll hurt if we're unaligned because we'll always test but never be aligned
+#if !NO_FAST_UNICODE_LOOP
+#if BIGENDIAN
+ if (bigEndian &&
+#else // BIGENDIAN
+ if (!bigEndian &&
+#endif // BIGENDIAN
+#if BIT64 // win64 has to be long aligned
+ (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
+#else
+ (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
+#endif // BIT64
+ lastByte == -1 && lastChar == 0)
+ {
+ // Need -1 to check 2 at a time. If we have an even #, longChars will go
+ // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longChars
+ // will go from longEnd - 1 long to longEnd. (Might not get to use this)
+ // We can only go iCount units (limited by shorter of char or byte buffers.
+ ulong* longEnd = (ulong*)(bytes - 7 +
+ (((byteEnd - bytes) >> 1 < charEnd - chars) ?
+ (byteEnd - bytes) : (charEnd - chars) << 1));
+
+ // Need new char* so we can check 4 at a time
+ ulong* longBytes = (ulong*)bytes;
+ ulong* longChars = (ulong*)chars;
+
+ while (longBytes < longEnd)
+ {
+ // See if we potentially have surrogates (0x8000 bit set)
+ // (We're either big endian on a big endian machine or little endian on
+ // a little endian machine so this'll work)
+ if ((0x8000800080008000 & *longBytes) != 0)
+ {
+ // See if any of these are high or low surrogates (0xd800 - 0xdfff). If the high
+ // 5 bits looks like 11011, then its a high or low surrogate.
+ // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
+ // Note that we expect BMP characters to be more common than surrogates
+ // & each char with 11111... then ^ with 11011. Zeroes then indicate surrogates
+ ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
+
+ // Check each of the 4 chars. 0 for those 16 bits means it was a surrogate
+ // but no clue if they're high or low.
+ // If each of the 4 characters are non-zero, then none are surrogates.
+ if ((uTemp & 0xFFFF000000000000) == 0 ||
+ (uTemp & 0x0000FFFF00000000) == 0 ||
+ (uTemp & 0x00000000FFFF0000) == 0 ||
+ (uTemp & 0x000000000000FFFF) == 0)
+ {
+ // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
+ // or if there's 1 or 4 surrogates
+
+ // If they happen to be high/low/high/low, we may as well continue. Check the next
+ // bit to see if its set (low) or not (high) in the right pattern
+#if BIGENDIAN
+ if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
+#else
+ if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
+#endif
+ {
+ // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
+ // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
+
+ // Drop out to the slow loop to resolve the surrogates
+ break;
+ }
+ // else they are all surrogates in High/Low/High/Low order, so we can use them.
+ }
+ // else none are surrogates, so we can use them.
+ }
+ // else all < 0x8000 so we can use them
+
+ // We can use these 4 chars.
+ *longChars = *longBytes;
+ longBytes++;
+ longChars++;
+ }
+
+ chars = (char*)longChars;
+ bytes = (byte*)longBytes;
+
+ if (bytes >= byteEnd)
+ break;
+ }
+#endif // !NO_FAST_UNICODE_LOOP
+
+ // Get 1st byte
+ if (lastByte < 0)
+ {
+ lastByte = *bytes++;
+ continue;
+ }
+
+ // Get full char
+ char ch;
+ if (bigEndian)
+ {
+ ch = (char)(lastByte << 8 | *(bytes++));
+ }
+ else
+ {
+ ch = (char)(*(bytes++) << 8 | lastByte);
+ }
+ lastByte = -1;
+
+ // See if the char's valid
+ if (ch >= 0xd800 && ch <= 0xdfff)
+ {
+ // Was it a high surrogate?
+ if (ch <= 0xdbff)
+ {
+ // Its a high surrogate, if we had one then do fallback for previous one
+ if (lastChar > 0)
+ {
+ // Get fallback for previous high surrogate
+ // Note we have to reconstruct bytes because some may have been in decoder
+ byte[] byteBuffer = null;
+ if (bigEndian)
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
+ }
+ else
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
+ }
+
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, charEnd);
+ }
+
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+ // couldn't fall back lonely surrogate
+ // We either advanced bytes or chars should == charStart and throw below
+ Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
+ "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (bad surrogate)");
+ bytes -= 2; // didn't use these 2 bytes
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // couldn't fallback but didn't throw
+ }
+ }
+
+ // Ignore the previous high surrogate which fell back already,
+ // yet remember the current high surrogate for next time.
+ lastChar = ch;
+ continue;
+ }
+
+ // Its a low surrogate
+ if (lastChar == 0)
+ {
+ // Expected a previous high surrogate
+ // Get fallback for this low surrogate
+ // Note we have to reconstruct bytes because some may have been in decoder
+ byte[] byteBuffer = null;
+ if (bigEndian)
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
+ }
+ else
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
+ }
+
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, charEnd);
+ }
+
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+ // couldn't fall back lonely surrogate
+ // We either advanced bytes or chars should == charStart and throw below
+ Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
+ "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (lonely surrogate)");
+ bytes -= 2; // didn't use these 2 bytes
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // couldn't fallback but didn't throw
+ }
+
+ // Didn't throw, ignore this one (we already did its fallback)
+ continue;
+ }
+
+ // Valid surrogate pair, add our lastChar (will need 2 chars)
+ if (chars >= charEnd - 1)
+ {
+ // couldn't find room for this surrogate pair
+ // We either advanced bytes or chars should == charStart and throw below
+ Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
+ "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (surrogate pair)");
+ bytes -= 2; // didn't use these 2 bytes
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ // Leave lastChar for next call to Convert()
+ break; // couldn't fallback but didn't throw
+ }
+
+ *chars++ = lastChar;
+ lastChar = (char)0;
+ }
+ else if (lastChar > 0)
+ {
+ // Had a high surrogate, expected a low surrogate, fall back the high surrogate.
+ byte[] byteBuffer = null;
+ if (bigEndian)
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
+ }
+ else
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
+ }
+
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, charEnd);
+ }
+
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+ // couldn't fall back high surrogate, or char that would be next
+ // We either advanced bytes or chars should == charStart and throw below
+ Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
+ "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (no low surrogate)");
+ bytes -= 2; // didn't use these 2 bytes
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // couldn't fallback but didn't throw
+ }
+
+ // Not left over now, clear previous high surrogate and continue to add current char
+ lastChar = (char)0;
+ }
+
+ // Valid char, room for it?
+ if (chars >= charEnd)
+ {
+ // 2 bytes couldn't fall back
+ // We either advanced bytes or chars should == charStart and throw below
+ Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
+ "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (normal)");
+ bytes -= 2; // didn't use these bytes
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // couldn't fallback but didn't throw
+ }
+
+ // add it
+ *chars++ = ch;
+ }
+
+ // Remember our decoder if we must
+ if (decoder == null || decoder.MustFlush)
+ {
+ if (lastChar > 0)
+ {
+ // No hanging high surrogates allowed, do fallback and remove count for it
+ byte[] byteBuffer = null;
+ if (bigEndian)
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
+ }
+ else
+ {
+ byteBuffer = new byte[]
+ { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
+ }
+
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, charEnd);
+ }
+
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+ // 2 bytes couldn't fall back
+ // We either advanced bytes or chars should == charStart and throw below
+ Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
+ "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (decoder)");
+ bytes -= 2; // didn't use these bytes
+ if (lastByte >= 0)
+ bytes--; // had an extra last byte hanging around
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ // We'll remember these in our decoder though
+ bytes += 2;
+ if (lastByte >= 0)
+ bytes++;
+ goto End;
+ }
+
+ // done with this one
+ lastChar = (char)0;
+ }
+
+ if (lastByte >= 0)
+ {
+ if (fallbackBuffer == null)
+ {
+ if (decoder == null)
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ else
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, charEnd);
+ }
+
+ // No hanging odd bytes allowed if must flush
+ charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
+ bool fallbackResult = fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+ // odd byte couldn't fall back
+ bytes--; // didn't use this byte
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ // didn't throw, but we'll remember it in the decoder
+ bytes++;
+ goto End;
+ }
+
+ // Didn't fail, clear buffer
+ lastByte = -1;
+ }
+ }
+
+ End:
+
+ // Remember our decoder if we must
+ if (decoder != null)
+ {
+ Debug.Assert((decoder.MustFlush == false) || ((lastChar == (char)0) && (lastByte == -1)),
+ "[UnicodeEncoding.GetChars] Expected no left over chars or bytes if flushing"
+ // + " " + ((int)lastChar).ToString("X4") + " " + lastByte.ToString("X2")
+ );
+
+ decoder.m_bytesUsed = (int)(bytes - byteStart);
+ decoder.lastChar = lastChar;
+ decoder.lastByte = lastByte;
+ }
+
+ // Used to do this the old way
+ // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount);
+
+ // Shouldn't have anything in fallback buffer for GetChars
+ // (don't have to check m_throwOnOverflow for count or chars)
+ Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
+ "[UnicodeEncoding.GetChars]Expected empty fallback buffer at end");
+
+ return (int)(chars - charStart);
+ }
+
+
+ public override System.Text.Encoder GetEncoder()
+ {
+ return new EncoderNLS(this);
+ }
+
+
+ public override System.Text.Decoder GetDecoder()
+ {
+ return new UnicodeEncoding.Decoder(this);
+ }
+
+
+ public override byte[] GetPreamble()
+ {
+ if (byteOrderMark)
+ {
+ // Note - we must allocate new byte[]'s here to prevent someone
+ // from modifying a cached byte[].
+ if (bigEndian)
+ return new byte[2] { 0xfe, 0xff };
+ else
+ return new byte[2] { 0xff, 0xfe };
+ }
+ return Array.Empty<Byte>();
+ }
+
+
+ public override int GetMaxByteCount(int charCount)
+ {
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(charCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
+ long byteCount = (long)charCount + 1;
+
+ if (EncoderFallback.MaxCharCount > 1)
+ byteCount *= EncoderFallback.MaxCharCount;
+
+ // 2 bytes per char
+ byteCount <<= 1;
+
+ if (byteCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ return (int)byteCount;
+ }
+
+
+ public override int GetMaxCharCount(int byteCount)
+ {
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(byteCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // long because byteCount could be biggest int.
+ // 1 char per 2 bytes. Round up in case 1 left over in decoder.
+ // Round up using &1 in case byteCount is max size
+ // Might also need an extra 1 if there's a left over high surrogate in the decoder.
+ long charCount = (long)(byteCount >> 1) + (byteCount & 1) + 1;
+
+ // Don't forget fallback (in case they have a bunch of lonely surrogates or something bizzare like that)
+ if (DecoderFallback.MaxCharCount > 1)
+ charCount *= DecoderFallback.MaxCharCount;
+
+ if (charCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
+
+ return (int)charCount;
+ }
+
+
+ public override bool Equals(Object value)
+ {
+ UnicodeEncoding that = value as UnicodeEncoding;
+ if (that != null)
+ {
+ //
+ // Big Endian Unicode has different code page (1201) than small Endian one (1200),
+ // so we still have to check m_codePage here.
+ //
+ return (CodePage == that.CodePage) &&
+ byteOrderMark == that.byteOrderMark &&
+ // isThrowException == that.isThrowException && // Same as Encoder/Decoder being exception fallbacks
+ bigEndian == that.bigEndian &&
+ (EncoderFallback.Equals(that.EncoderFallback)) &&
+ (DecoderFallback.Equals(that.DecoderFallback));
+ }
+ return (false);
+ }
+
+ public override int GetHashCode()
+ {
+ return CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
+ (byteOrderMark ? 4 : 0) + (bigEndian ? 8 : 0);
+ }
+
+ [Serializable]
+ private sealed class Decoder : System.Text.DecoderNLS, ISerializable
+ {
+ internal int lastByte = -1;
+ internal char lastChar = '\0';
+
+ public Decoder(UnicodeEncoding encoding) : base(encoding)
+ {
+ // base calls reset
+ }
+
+ // Constructor called by serialization, have to handle deserializing from Everett
+ internal Decoder(SerializationInfo info, StreamingContext context)
+ {
+ // Any info?
+ if (info == null) throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ // Get Common Info
+ this.lastByte = (int)info.GetValue("lastByte", typeof(int));
+
+ try
+ {
+ // Try the encoding, which is only serialized in Whidbey
+ this.m_encoding = (Encoding)info.GetValue("m_encoding", typeof(Encoding));
+ this.lastChar = (char)info.GetValue("lastChar", typeof(char));
+ this.m_fallback = (DecoderFallback)info.GetValue("m_fallback", typeof(DecoderFallback));
+ }
+ catch (SerializationException)
+ {
+ // Everett didn't serialize the UnicodeEncoding, get the default one
+ bool bigEndian = (bool)info.GetValue("bigEndian", typeof(bool));
+ this.m_encoding = new UnicodeEncoding(bigEndian, false);
+ }
+ }
+
+ // ISerializable implementation, get data for this object
+ void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
+ {
+ // Any info?
+ if (info == null) throw new ArgumentNullException(nameof(info));
+ Contract.EndContractBlock();
+
+ // Save Whidbey data
+ info.AddValue("m_encoding", this.m_encoding);
+ info.AddValue("m_fallback", this.m_fallback);
+ info.AddValue("lastChar", this.lastChar); // Unused by everett so it'll probably get lost
+ info.AddValue("lastByte", this.lastByte);
+
+ // Everett Only
+ info.AddValue("bigEndian", ((UnicodeEncoding)(this.m_encoding)).bigEndian);
+ }
+
+ public override void Reset()
+ {
+ lastByte = -1;
+ lastChar = '\0';
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Anything left in our decoder?
+ internal override bool HasState
+ {
+ get
+ {
+ return (this.lastByte != -1 || this.lastChar != '\0');
+ }
+ }
+ }
+ }
+}
+