summaryrefslogtreecommitdiff
path: root/src/mscorlib/shared/System/Text/UTF32Encoding.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/mscorlib/shared/System/Text/UTF32Encoding.cs')
-rw-r--r--src/mscorlib/shared/System/Text/UTF32Encoding.cs1234
1 files changed, 1234 insertions, 0 deletions
diff --git a/src/mscorlib/shared/System/Text/UTF32Encoding.cs b/src/mscorlib/shared/System/Text/UTF32Encoding.cs
new file mode 100644
index 0000000000..e4cd6c960e
--- /dev/null
+++ b/src/mscorlib/shared/System/Text/UTF32Encoding.cs
@@ -0,0 +1,1234 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
+//
+
+using System;
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
+using System.Globalization;
+
+namespace System.Text
+{
+ // Encodes text into and out of UTF-32. UTF-32 is a way of writing
+ // Unicode characters with a single storage unit (32 bits) per character,
+ //
+ // The UTF-32 byte order mark is simply the Unicode byte order mark
+ // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
+ // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
+ // switch the byte orderings.
+
+ [Serializable]
+ public sealed class UTF32Encoding : Encoding
+ {
+ /*
+ words bits UTF-32 representation
+ ----- ---- -----------------------------------
+ 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
+ 2 21 00000000 000xxxxx hhhhhhll llllllll
+ ----- ---- -----------------------------------
+
+ Surrogate:
+ Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
+ */
+
+ // Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
+ // The initialization code will not be run until a static member of the class is referenced
+ internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
+ internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
+
+ private bool _emitUTF32ByteOrderMark = false;
+ private bool _isThrowException = false;
+ private bool _bigEndian = false;
+
+
+ public UTF32Encoding() : this(false, true, false)
+ {
+ }
+
+
+ public UTF32Encoding(bool bigEndian, bool byteOrderMark) :
+ this(bigEndian, byteOrderMark, false)
+ {
+ }
+
+
+ public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters) :
+ base(bigEndian ? 12001 : 12000)
+ {
+ _bigEndian = bigEndian;
+ _emitUTF32ByteOrderMark = byteOrderMark;
+ _isThrowException = throwOnInvalidCharacters;
+
+ // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
+ if (_isThrowException)
+ SetDefaultFallbacks();
+ }
+
+ internal override void SetDefaultFallbacks()
+ {
+ // For UTF-X encodings, we use a replacement fallback with an empty string
+ if (_isThrowException)
+ {
+ this.encoderFallback = EncoderFallback.ExceptionFallback;
+ this.decoderFallback = DecoderFallback.ExceptionFallback;
+ }
+ else
+ {
+ this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
+ this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
+ }
+ }
+
+
+ // The following methods are copied from EncodingNLS.cs.
+ // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
+ // These should be kept in sync for the following classes:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ // Returns the number of bytes required to encode a range of characters in
+ // a character array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(char[] chars, int index, int count)
+ {
+ // Validate input parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - index < count)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input, return 0, avoid fixed empty array problem
+ if (count == 0)
+ return 0;
+
+ // Just call the pointer version
+ fixed (char* pChars = chars)
+ return GetByteCount(pChars + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetByteCount(String s)
+ {
+ // Validate input
+ if (s==null)
+ throw new ArgumentNullException("s");
+ Contract.EndContractBlock();
+
+ fixed (char* pChars = s)
+ return GetByteCount(pChars, s.Length, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetByteCount(char* chars, int count)
+ {
+ // Validate Parameters
+ if (chars == null)
+ throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Call it with empty encoder
+ return GetByteCount(chars, count, null);
+ }
+
+ // Parent method is safe.
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ public override unsafe int GetBytes(String s, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ if (s == null || bytes == null)
+ throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (s.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fix our input array if 0 length because fixed doesn't like 0 length arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // Encodes a range of characters in a character array into a range of bytes
+ // in a byte array. An exception occurs if the byte array is not large
+ // enough to hold the complete encoding of the characters. The
+ // GetByteCount method can be used to determine the exact number of
+ // bytes that will be produced for a given range of characters.
+ // Alternatively, the GetMaxByteCount method can be used to
+ // determine the maximum number of bytes that will be produced for a given
+ // number of characters, regardless of the actual character values.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
+ byte[] bytes, int byteIndex)
+ {
+ // Validate parameters
+ if (chars == null || bytes == null)
+ throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
+
+ if (charIndex < 0 || charCount < 0)
+ throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (chars.Length - charIndex < charCount)
+ throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (byteIndex < 0 || byteIndex > bytes.Length)
+ throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If nothing to encode return 0, avoid fixed problem
+ if (charCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int byteCount = bytes.Length - byteIndex;
+
+ // Fix our input array if 0 length because fixed doesn't like 0 length arrays
+ if (bytes.Length == 0)
+ bytes = new byte[1];
+
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
+ // Remember that byteCount is # to decode, not size of array.
+ return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetBytes(chars, charCount, bytes, byteCount, null);
+ }
+
+ // Returns the number of characters produced by decoding a range of bytes
+ // in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetCharCount(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // If no input just return 0, fixed doesn't like 0 length arrays.
+ if (count == 0)
+ return 0;
+
+ // Just call pointer version
+ fixed (byte* pBytes = bytes)
+ return GetCharCount(pBytes + index, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public override unsafe int GetCharCount(byte* bytes, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (count < 0)
+ throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetCharCount(bytes, count, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (byteIndex < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if ( bytes.Length - byteIndex < byteCount)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+
+ if (charIndex < 0 || charIndex > chars.Length)
+ throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
+ Contract.EndContractBlock();
+
+ // If no input, return 0 & avoid fixed problem
+ if (byteCount == 0)
+ return 0;
+
+ // Just call pointer version
+ int charCount = chars.Length - charIndex;
+
+ // Fix our input array if 0 length because fixed doesn't like 0 length arrays
+ if (chars.Length == 0)
+ chars = new char[1];
+
+ fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
+ // Remember that charCount is # to decode, not size of array
+ return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+ }
+
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+
+ [CLSCompliant(false)]
+ public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
+ {
+ // Validate Parameters
+ if (bytes == null || chars == null)
+ throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
+
+ if (charCount < 0 || byteCount < 0)
+ throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ return GetChars(bytes, byteCount, chars, charCount, null);
+ }
+
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
+
+ public override unsafe String GetString(byte[] bytes, int index, int count)
+ {
+ // Validate Parameters
+ if (bytes == null)
+ throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
+
+ if (index < 0 || count < 0)
+ throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
+
+ if (bytes.Length - index < count)
+ throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
+ Contract.EndContractBlock();
+
+ // Avoid problems with empty input buffer
+ if (count == 0) return String.Empty;
+
+ fixed (byte* pBytes = bytes)
+ return String.CreateStringFromEncoding(
+ pBytes + index, count, this);
+ }
+
+ //
+ // End of standard methods copied from EncodingNLS.cs
+ //
+
+ internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
+ {
+ Debug.Assert(chars != null, "[UTF32Encoding.GetByteCount]chars!=null");
+ Debug.Assert(count >= 0, "[UTF32Encoding.GetByteCount]count >=0");
+
+ char* end = chars + count;
+ char* charStart = chars;
+ int byteCount = 0;
+
+ char highSurrogate = '\0';
+
+ // For fallback we may need a fallback buffer
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ if (encoder != null)
+ {
+ highSurrogate = encoder.charLeftOver;
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // We mustn't have left over fallback data when counting
+ if (fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+ }
+ else
+ {
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ }
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, end, encoder, false);
+
+ char ch;
+ TryAgain:
+
+ while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Do we need a low surrogate?
+ if (highSurrogate != '\0')
+ {
+ //
+ // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
+ //
+ if (Char.IsLowSurrogate(ch))
+ {
+ // They're all legal
+ highSurrogate = '\0';
+
+ //
+ // One surrogate pair will be translated into 4 bytes UTF32.
+ //
+
+ byteCount += 4;
+ continue;
+ }
+
+ // We are missing our low surrogate, decrement chars and fallback the high surrogate
+ // The high surrogate may have come from the encoder, but nothing else did.
+ Debug.Assert(chars > charStart,
+ "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
+ chars--;
+
+ // Do the fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
+ chars = charsForFallback;
+
+ // We're going to fallback the old high surrogate.
+ highSurrogate = '\0';
+ continue;
+ }
+
+ // Do we have another high surrogate?
+ if (Char.IsHighSurrogate(ch))
+ {
+ //
+ // We'll have a high surrogate to check next time.
+ //
+ highSurrogate = ch;
+ continue;
+ }
+
+ // Check for illegal characters
+ if (Char.IsLowSurrogate(ch))
+ {
+ // We have a leading low surrogate, do the fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Try again with fallback buffer
+ continue;
+ }
+
+ // We get to add the character (4 bytes UTF32)
+ byteCount += 4;
+ }
+
+ // May have to do our last surrogate
+ if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
+ {
+ // We have to do the fallback for the lonely high surrogate
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
+ chars = charsForFallback;
+
+ highSurrogate = (char)0;
+ goto TryAgain;
+ }
+
+ // Check for overflows.
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ // Shouldn't have anything in fallback buffer for GetByteCount
+ // (don't have to check m_throwOnOverflow for count)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
+
+ // Return our count
+ return byteCount;
+ }
+
+ internal override unsafe int GetBytes(char* chars, int charCount,
+ byte* bytes, int byteCount, EncoderNLS encoder)
+ {
+ Debug.Assert(chars != null, "[UTF32Encoding.GetBytes]chars!=null");
+ Debug.Assert(bytes != null, "[UTF32Encoding.GetBytes]bytes!=null");
+ Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetBytes]byteCount >=0");
+ Debug.Assert(charCount >= 0, "[UTF32Encoding.GetBytes]charCount >=0");
+
+ char* charStart = chars;
+ char* charEnd = chars + charCount;
+ byte* byteStart = bytes;
+ byte* byteEnd = bytes + byteCount;
+
+ char highSurrogate = '\0';
+
+ // For fallback we may need a fallback buffer
+ EncoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ if (encoder != null)
+ {
+ highSurrogate = encoder.charLeftOver;
+ fallbackBuffer = encoder.FallbackBuffer;
+
+ // We mustn't have left over fallback data when not converting
+ if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0)
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+ }
+ else
+ {
+ fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
+ }
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
+
+ char ch;
+ TryAgain:
+
+ while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
+ {
+ // First unwind any fallback
+ if (ch == 0)
+ {
+ // No fallback, just get next char
+ ch = *chars;
+ chars++;
+ }
+
+ // Do we need a low surrogate?
+ if (highSurrogate != '\0')
+ {
+ //
+ // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
+ //
+ if (Char.IsLowSurrogate(ch))
+ {
+ // Is it a legal one?
+ uint iTemp = GetSurrogate(highSurrogate, ch);
+ highSurrogate = '\0';
+
+ //
+ // One surrogate pair will be translated into 4 bytes UTF32.
+ //
+ if (bytes + 3 >= byteEnd)
+ {
+ // Don't have 4 bytes
+ if (fallbackBuffer.bFallingBack)
+ {
+ fallbackBuffer.MovePrevious(); // Aren't using these 2 fallback chars
+ fallbackBuffer.MovePrevious();
+ }
+ else
+ {
+ // If we don't have enough room, then either we should've advanced a while
+ // or we should have bytes==byteStart and throw below
+ Debug.Assert(chars > charStart + 1 || bytes == byteStart,
+ "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
+ chars -= 2; // Aren't using those 2 chars
+ }
+ ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
+ highSurrogate = (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
+ break;
+ }
+
+ if (_bigEndian)
+ {
+ *(bytes++) = (byte)(0x00);
+ *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
+ *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
+ *(bytes++) = (byte)(iTemp); // Implies & 0xFF
+ }
+ else
+ {
+ *(bytes++) = (byte)(iTemp); // Implies & 0xFF
+ *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
+ *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
+ *(bytes++) = (byte)(0x00);
+ }
+ continue;
+ }
+
+ // We are missing our low surrogate, decrement chars and fallback the high surrogate
+ // The high surrogate may have come from the encoder, but nothing else did.
+ Debug.Assert(chars > charStart,
+ "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
+ chars--;
+
+ // Do the fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
+ chars = charsForFallback;
+
+ // We're going to fallback the old high surrogate.
+ highSurrogate = '\0';
+ continue;
+ }
+
+ // Do we have another high surrogate?, if so remember it
+ if (Char.IsHighSurrogate(ch))
+ {
+ //
+ // We'll have a high surrogate to check next time.
+ //
+ highSurrogate = ch;
+ continue;
+ }
+
+ // Check for illegal characters (low surrogate)
+ if (Char.IsLowSurrogate(ch))
+ {
+ // We have a leading low surrogate, do the fallback
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(ch, ref charsForFallback);
+ chars = charsForFallback;
+
+ // Try again with fallback buffer
+ continue;
+ }
+
+ // We get to add the character, yippee.
+ if (bytes + 3 >= byteEnd)
+ {
+ // Don't have 4 bytes
+ if (fallbackBuffer.bFallingBack)
+ fallbackBuffer.MovePrevious(); // Aren't using this fallback char
+ else
+ {
+ // Must've advanced already
+ Debug.Assert(chars > charStart,
+ "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
+ chars--; // Aren't using this char
+ }
+ ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
+ break; // Didn't throw, stop
+ }
+
+ if (_bigEndian)
+ {
+ *(bytes++) = (byte)(0x00);
+ *(bytes++) = (byte)(0x00);
+ *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
+ *(bytes++) = (byte)(ch); // Implies & 0xFF
+ }
+ else
+ {
+ *(bytes++) = (byte)(ch); // Implies & 0xFF
+ *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
+ *(bytes++) = (byte)(0x00);
+ *(bytes++) = (byte)(0x00);
+ }
+ }
+
+ // May have to do our last surrogate
+ if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
+ {
+ // We have to do the fallback for the lonely high surrogate
+ charsForFallback = chars;
+ fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
+ chars = charsForFallback;
+
+ highSurrogate = (char)0;
+ goto TryAgain;
+ }
+
+ // Fix our encoder if we have one
+ Debug.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
+ "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
+
+ if (encoder != null)
+ {
+ // Remember our left over surrogate (or 0 if flushing)
+ encoder.charLeftOver = highSurrogate;
+
+ // Need # chars used
+ encoder.m_charsUsed = (int)(chars - charStart);
+ }
+
+ // return the new length
+ return (int)(bytes - byteStart);
+ }
+
+ internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(bytes != null, "[UTF32Encoding.GetCharCount]bytes!=null");
+ Debug.Assert(count >= 0, "[UTF32Encoding.GetCharCount]count >=0");
+
+ UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
+
+ // None so far!
+ int charCount = 0;
+ byte* end = bytes + count;
+ byte* byteStart = bytes;
+
+ // Set up decoder
+ int readCount = 0;
+ uint iChar = 0;
+
+ // For fallback we may need a fallback buffer
+ DecoderFallbackBuffer fallbackBuffer = null;
+
+ // See if there's anything in our decoder
+ if (decoder != null)
+ {
+ readCount = decoder.readByteCount;
+ iChar = (uint)decoder.iChar;
+ fallbackBuffer = decoder.FallbackBuffer;
+
+ // Shouldn't have anything in fallback buffer for GetCharCount
+ // (don't have to check m_throwOnOverflow for chars or count)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
+ }
+ else
+ {
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ }
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(byteStart, null);
+
+ // Loop through our input, 4 characters at a time!
+ while (bytes < end && charCount >= 0)
+ {
+ // Get our next character
+ if (_bigEndian)
+ {
+ // Scoot left and add it to the bottom
+ iChar <<= 8;
+ iChar += *(bytes++);
+ }
+ else
+ {
+ // Scoot right and add it to the top
+ iChar >>= 8;
+ iChar += (uint)(*(bytes++)) << 24;
+ }
+
+ readCount++;
+
+ // See if we have all the bytes yet
+ if (readCount < 4)
+ continue;
+
+ // Have the bytes
+ readCount = 0;
+
+ // See if its valid to encode
+ if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
+ {
+ // Need to fall back these 4 bytes
+ byte[] fallbackBytes;
+ if (_bigEndian)
+ {
+ fallbackBytes = new byte[] {
+ unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
+ unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
+ }
+ else
+ {
+ fallbackBytes = new byte[] {
+ unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
+ unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
+ }
+
+ charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
+
+ // Ignore the illegal character
+ iChar = 0;
+ continue;
+ }
+
+ // Ok, we have something we can add to our output
+ if (iChar >= 0x10000)
+ {
+ // Surrogates take 2
+ charCount++;
+ }
+
+ // Add the rest of the surrogate or our normal character
+ charCount++;
+
+ // iChar is back to 0
+ iChar = 0;
+ }
+
+ // See if we have something left over that has to be decoded
+ if (readCount > 0 && (decoder == null || decoder.MustFlush))
+ {
+ // Oops, there's something left over with no place to go.
+ byte[] fallbackBytes = new byte[readCount];
+ if (_bigEndian)
+ {
+ while (readCount > 0)
+ {
+ fallbackBytes[--readCount] = unchecked((byte)iChar);
+ iChar >>= 8;
+ }
+ }
+ else
+ {
+ while (readCount > 0)
+ {
+ fallbackBytes[--readCount] = unchecked((byte)(iChar >> 24));
+ iChar <<= 8;
+ }
+ }
+
+ charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
+ }
+
+ // Check for overflows.
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ // Shouldn't have anything in fallback buffer for GetCharCount
+ // (don't have to check m_throwOnOverflow for chars or count)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
+
+ // Return our count
+ return charCount;
+ }
+
+ internal override unsafe int GetChars(byte* bytes, int byteCount,
+ char* chars, int charCount, DecoderNLS baseDecoder)
+ {
+ Debug.Assert(chars != null, "[UTF32Encoding.GetChars]chars!=null");
+ Debug.Assert(bytes != null, "[UTF32Encoding.GetChars]bytes!=null");
+ Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetChars]byteCount >=0");
+ Debug.Assert(charCount >= 0, "[UTF32Encoding.GetChars]charCount >=0");
+
+ UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
+
+ // None so far!
+ char* charStart = chars;
+ char* charEnd = chars + charCount;
+
+ byte* byteStart = bytes;
+ byte* byteEnd = bytes + byteCount;
+
+ // See if there's anything in our decoder (but don't clear it yet)
+ int readCount = 0;
+ uint iChar = 0;
+
+ // For fallback we may need a fallback buffer
+ DecoderFallbackBuffer fallbackBuffer = null;
+ char* charsForFallback;
+
+ // See if there's anything in our decoder
+ if (decoder != null)
+ {
+ readCount = decoder.readByteCount;
+ iChar = (uint)decoder.iChar;
+ fallbackBuffer = baseDecoder.FallbackBuffer;
+
+ // Shouldn't have anything in fallback buffer for GetChars
+ // (don't have to check m_throwOnOverflow for chars)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
+ }
+ else
+ {
+ fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
+ }
+
+ // Set our internal fallback interesting things.
+ fallbackBuffer.InternalInitialize(bytes, chars + charCount);
+
+ // Loop through our input, 4 characters at a time!
+ while (bytes < byteEnd)
+ {
+ // Get our next character
+ if (_bigEndian)
+ {
+ // Scoot left and add it to the bottom
+ iChar <<= 8;
+ iChar += *(bytes++);
+ }
+ else
+ {
+ // Scoot right and add it to the top
+ iChar >>= 8;
+ iChar += (uint)(*(bytes++)) << 24;
+ }
+
+ readCount++;
+
+ // See if we have all the bytes yet
+ if (readCount < 4)
+ continue;
+
+ // Have the bytes
+ readCount = 0;
+
+ // See if its valid to encode
+ if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
+ {
+ // Need to fall back these 4 bytes
+ byte[] fallbackBytes;
+ if (_bigEndian)
+ {
+ fallbackBytes = new byte[] {
+ unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
+ unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
+ }
+ else
+ {
+ fallbackBytes = new byte[] {
+ unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
+ unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
+ }
+
+ // Chars won't be updated unless this works.
+ charsForFallback = chars;
+ bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+
+ // Couldn't fallback, throw or wait til next time
+ // We either read enough bytes for bytes-=4 to work, or we're
+ // going to throw in ThrowCharsOverflow because chars == charStart
+ Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
+ "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
+ bytes -= 4; // get back to where we were
+ iChar = 0; // Remembering nothing
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // Stop here, didn't throw
+ }
+
+ // Ignore the illegal character
+ iChar = 0;
+ continue;
+ }
+
+
+ // Ok, we have something we can add to our output
+ if (iChar >= 0x10000)
+ {
+ // Surrogates take 2
+ if (chars >= charEnd - 1)
+ {
+ // Throwing or stopping
+ // We either read enough bytes for bytes-=4 to work, or we're
+ // going to throw in ThrowCharsOverflow because chars == charStart
+ Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
+ "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
+ bytes -= 4; // get back to where we were
+ iChar = 0; // Remembering nothing
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // Stop here, didn't throw
+ }
+
+ *(chars++) = GetHighSurrogate(iChar);
+ iChar = GetLowSurrogate(iChar);
+ }
+ // Bounds check for normal character
+ else if (chars >= charEnd)
+ {
+ // Throwing or stopping
+ // We either read enough bytes for bytes-=4 to work, or we're
+ // going to throw in ThrowCharsOverflow because chars == charStart
+ Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
+ "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
+ bytes -= 4; // get back to where we were
+ iChar = 0; // Remembering nothing
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ break; // Stop here, didn't throw
+ }
+
+ // Add the rest of the surrogate or our normal character
+ *(chars++) = (char)iChar;
+
+ // iChar is back to 0
+ iChar = 0;
+ }
+
+ // See if we have something left over that has to be decoded
+ if (readCount > 0 && (decoder == null || decoder.MustFlush))
+ {
+ // Oops, there's something left over with no place to go.
+ byte[] fallbackBytes = new byte[readCount];
+ int tempCount = readCount;
+ if (_bigEndian)
+ {
+ while (tempCount > 0)
+ {
+ fallbackBytes[--tempCount] = unchecked((byte)iChar);
+ iChar >>= 8;
+ }
+ }
+ else
+ {
+ while (tempCount > 0)
+ {
+ fallbackBytes[--tempCount] = unchecked((byte)(iChar >> 24));
+ iChar <<= 8;
+ }
+ }
+
+ charsForFallback = chars;
+ bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
+ chars = charsForFallback;
+
+ if (!fallbackResult)
+ {
+ // Couldn't fallback.
+ fallbackBuffer.InternalReset();
+ ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
+ // Stop here, didn't throw, backed up, so still nothing in buffer
+ }
+ else
+ {
+ // Don't clear our decoder unless we could fall it back.
+ // If we caught the if above, then we're a convert() and will catch this next time.
+ readCount = 0;
+ iChar = 0;
+ }
+ }
+
+ // Remember any left over stuff, clearing buffer as well for MustFlush
+ if (decoder != null)
+ {
+ decoder.iChar = (int)iChar;
+ decoder.readByteCount = readCount;
+ decoder.m_bytesUsed = (int)(bytes - byteStart);
+ }
+
+ // Shouldn't have anything in fallback buffer for GetChars
+ // (don't have to check m_throwOnOverflow for chars)
+ Debug.Assert(fallbackBuffer.Remaining == 0,
+ "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
+
+ // Return our count
+ return (int)(chars - charStart);
+ }
+
+
+ private uint GetSurrogate(char cHigh, char cLow)
+ {
+ return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
+ }
+
+ private char GetHighSurrogate(uint iChar)
+ {
+ return (char)((iChar - 0x10000) / 0x400 + 0xD800);
+ }
+
+ private char GetLowSurrogate(uint iChar)
+ {
+ return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
+ }
+
+
+ public override Decoder GetDecoder()
+ {
+ return new UTF32Decoder(this);
+ }
+
+
+ public override Encoder GetEncoder()
+ {
+ return new EncoderNLS(this);
+ }
+
+
+ public override int GetMaxByteCount(int charCount)
+ {
+ if (charCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(charCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
+ long byteCount = (long)charCount + 1;
+
+ if (EncoderFallback.MaxCharCount > 1)
+ byteCount *= EncoderFallback.MaxCharCount;
+
+ // 4 bytes per char
+ byteCount *= 4;
+
+ if (byteCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
+
+ return (int)byteCount;
+ }
+
+
+ public override int GetMaxCharCount(int byteCount)
+ {
+ if (byteCount < 0)
+ throw new ArgumentOutOfRangeException(nameof(byteCount),
+ SR.ArgumentOutOfRange_NeedNonNegNum);
+ Contract.EndContractBlock();
+
+ // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
+ // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
+ // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
+ int charCount = (byteCount / 2) + 2;
+
+ // Also consider fallback because our input bytes could be out of range of unicode.
+ // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
+ if (DecoderFallback.MaxCharCount > 2)
+ {
+ // Multiply time fallback size
+ charCount *= DecoderFallback.MaxCharCount;
+
+ // We were already figuring 2 chars per 4 bytes, but fallback will be different #
+ charCount /= 2;
+ }
+
+ if (charCount > 0x7fffffff)
+ throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
+
+ return (int)charCount;
+ }
+
+
+ public override byte[] GetPreamble()
+ {
+ if (_emitUTF32ByteOrderMark)
+ {
+ // Allocate new array to prevent users from modifying it.
+ if (_bigEndian)
+ {
+ return new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
+ }
+ else
+ {
+ return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
+ }
+ }
+ else
+ return Array.Empty<byte>();
+ }
+
+
+ public override bool Equals(Object value)
+ {
+ UTF32Encoding that = value as UTF32Encoding;
+ if (that != null)
+ {
+ return (_emitUTF32ByteOrderMark == that._emitUTF32ByteOrderMark) &&
+ (_bigEndian == that._bigEndian) &&
+ (EncoderFallback.Equals(that.EncoderFallback)) &&
+ (DecoderFallback.Equals(that.DecoderFallback));
+ }
+ return (false);
+ }
+
+
+ public override int GetHashCode()
+ {
+ //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
+ return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
+ CodePage + (_emitUTF32ByteOrderMark ? 4 : 0) + (_bigEndian ? 8 : 0);
+ }
+
+ [Serializable]
+ private sealed class UTF32Decoder : DecoderNLS
+ {
+ // Need a place to store any extra bytes we may have picked up
+ internal int iChar = 0;
+ internal int readByteCount = 0;
+
+ public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
+ {
+ // base calls reset
+ }
+
+ public override void Reset()
+ {
+ this.iChar = 0;
+ this.readByteCount = 0;
+ if (m_fallbackBuffer != null)
+ m_fallbackBuffer.Reset();
+ }
+
+ // Anything left in our decoder?
+ internal override bool HasState
+ {
+ get
+ {
+ // ReadByteCount is our flag. (iChar==0 doesn't mean much).
+ return (this.readByteCount != 0);
+ }
+ }
+ }
+ }
+}