diff options
Diffstat (limited to 'src/mscorlib/shared/System/Text/Encoder.cs')
-rw-r--r-- | src/mscorlib/shared/System/Text/Encoder.cs | 333 |
1 files changed, 333 insertions, 0 deletions
diff --git a/src/mscorlib/shared/System/Text/Encoder.cs b/src/mscorlib/shared/System/Text/Encoder.cs new file mode 100644 index 0000000000..e4e91765e1 --- /dev/null +++ b/src/mscorlib/shared/System/Text/Encoder.cs @@ -0,0 +1,333 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.Serialization; +using System.Text; +using System; +using System.Diagnostics; +using System.Diagnostics.Contracts; + +namespace System.Text +{ + // An Encoder is used to encode a sequence of blocks of characters into + // a sequence of blocks of bytes. Following instantiation of an encoder, + // sequential blocks of characters are converted into blocks of bytes through + // calls to the GetBytes method. The encoder maintains state between the + // conversions, allowing it to correctly encode character sequences that span + // adjacent blocks. + // + // Instances of specific implementations of the Encoder abstract base + // class are typically obtained through calls to the GetEncoder method + // of Encoding objects. + // + [Serializable] + public abstract class Encoder + { + internal EncoderFallback m_fallback = null; + + [NonSerialized] + internal EncoderFallbackBuffer m_fallbackBuffer = null; + + internal void SerializeEncoder(SerializationInfo info) + { + info.AddValue("m_fallback", this.m_fallback); + } + + protected Encoder() + { + // We don't call default reset because default reset probably isn't good if we aren't initialized. + } + + public EncoderFallback Fallback + { + get + { + return m_fallback; + } + + set + { + if (value == null) + throw new ArgumentNullException(nameof(value)); + Contract.EndContractBlock(); + + // Can't change fallback if buffer is wrong + if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0) + throw new ArgumentException( + SR.Argument_FallbackBufferNotEmpty, nameof(value)); + + m_fallback = value; + m_fallbackBuffer = null; + } + } + + // Note: we don't test for threading here because async access to Encoders and Decoders + // doesn't work anyway. + public EncoderFallbackBuffer FallbackBuffer + { + get + { + if (m_fallbackBuffer == null) + { + if (m_fallback != null) + m_fallbackBuffer = m_fallback.CreateFallbackBuffer(); + else + m_fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer(); + } + + return m_fallbackBuffer; + } + } + + internal bool InternalHasFallbackBuffer + { + get + { + return m_fallbackBuffer != null; + } + } + + // Reset the Encoder + // + // Normally if we call GetBytes() and an error is thrown we don't change the state of the encoder. This + // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.) + // + // If the caller doesn't want to try again after GetBytes() throws an error, then they need to call Reset(). + // + // Virtual implementation has to call GetBytes with flush and a big enough buffer to clear a 0 char string + // We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big. + public virtual void Reset() + { + char[] charTemp = { }; + byte[] byteTemp = new byte[GetByteCount(charTemp, 0, 0, true)]; + GetBytes(charTemp, 0, 0, byteTemp, 0, true); + if (m_fallbackBuffer != null) + m_fallbackBuffer.Reset(); + } + + // Returns the number of bytes the next call to GetBytes will + // produce if presented with the given range of characters and the given + // value of the flush parameter. The returned value takes into + // account the state in which the encoder was left following the last call + // to GetBytes. The state of the encoder is not affected by a call + // to this method. + // + public abstract int GetByteCount(char[] chars, int index, int count, bool flush); + + // We expect this to be the workhorse for NLS encodings + // unfortunately for existing overrides, it has to call the [] version, + // which is really slow, so avoid this method if you might be calling external encodings. + [CLSCompliant(false)] + public virtual unsafe int GetByteCount(char* chars, int count, bool flush) + { + // Validate input parameters + if (chars == null) + throw new ArgumentNullException(nameof(chars), + SR.ArgumentNull_Array); + + if (count < 0) + throw new ArgumentOutOfRangeException(nameof(count), + SR.ArgumentOutOfRange_NeedNonNegNum); + Contract.EndContractBlock(); + + char[] arrChar = new char[count]; + int index; + + for (index = 0; index < count; index++) + arrChar[index] = chars[index]; + + return GetByteCount(arrChar, 0, count, flush); + } + + // Encodes a range of characters in a character array into a range of bytes + // in a byte array. The method encodes charCount characters from + // chars starting at index charIndex, storing the resulting + // bytes in bytes starting at index byteIndex. The encoding + // takes into account the state in which the encoder was left following the + // last call to this method. The flush parameter indicates whether + // the encoder should flush any shift-states and partial characters at the + // end of the conversion. To ensure correct termination of a sequence of + // blocks of encoded bytes, the last call to GetBytes should specify + // a value of true for the flush parameter. + // + // An exception occurs if the byte array is not large enough to hold the + // complete encoding of the characters. The GetByteCount method can + // be used to determine the exact number of bytes that will be produced for + // a given range of characters. Alternatively, the GetMaxByteCount + // method of the Encoding that produced this encoder can be used to + // determine the maximum number of bytes that will be produced for a given + // number of characters, regardless of the actual character values. + // + public abstract int GetBytes(char[] chars, int charIndex, int charCount, + byte[] bytes, int byteIndex, bool flush); + + // We expect this to be the workhorse for NLS Encodings, but for existing + // ones we need a working (if slow) default implementation) + // + // WARNING WARNING WARNING + // + // WARNING: If this breaks it could be a security threat. Obviously we + // call this internally, so you need to make sure that your pointers, counts + // and indexes are correct when you call this method. + // + // In addition, we have internal code, which will be marked as "safe" calling + // this code. However this code is dependent upon the implementation of an + // external GetBytes() method, which could be overridden by a third party and + // the results of which cannot be guaranteed. We use that result to copy + // the byte[] to our byte* output buffer. If the result count was wrong, we + // could easily overflow our output buffer. Therefore we do an extra test + // when we copy the buffer so that we don't overflow byteCount either. + [CLSCompliant(false)] + public virtual unsafe int GetBytes(char* chars, int charCount, + byte* bytes, int byteCount, bool flush) + { + // Validate input parameters + if (bytes == null || chars == null) + throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), + SR.ArgumentNull_Array); + + if (charCount < 0 || byteCount < 0) + throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), + SR.ArgumentOutOfRange_NeedNonNegNum); + Contract.EndContractBlock(); + + // Get the char array to convert + char[] arrChar = new char[charCount]; + + int index; + for (index = 0; index < charCount; index++) + arrChar[index] = chars[index]; + + // Get the byte array to fill + byte[] arrByte = new byte[byteCount]; + + // Do the work + int result = GetBytes(arrChar, 0, charCount, arrByte, 0, flush); + + Debug.Assert(result <= byteCount, "Returned more bytes than we have space for"); + + // Copy the byte array + // WARNING: We MUST make sure that we don't copy too many bytes. We can't + // rely on result because it could be a 3rd party implementation. We need + // to make sure we never copy more than byteCount bytes no matter the value + // of result + if (result < byteCount) + byteCount = result; + + // Don't copy too many bytes! + for (index = 0; index < byteCount; index++) + bytes[index] = arrByte[index]; + + return byteCount; + } + + // This method is used to avoid running out of output buffer space. + // It will encode until it runs out of chars, and then it will return + // true if it the entire input was converted. In either case it + // will also return the number of converted chars and output bytes used. + // It will only throw a buffer overflow exception if the entire lenght of bytes[] is + // too small to store the next byte. (like 0 or maybe 1 or 4 for some encodings) + // We're done processing this buffer only if completed returns true. + // + // Might consider checking Max...Count to avoid the extra counting step. + // + // Note that if all of the input chars are not consumed, then we'll do a /2, which means + // that its likely that we didn't consume as many chars as we could have. For some + // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream) + public virtual void Convert(char[] chars, int charIndex, int charCount, + byte[] bytes, int byteIndex, int byteCount, bool flush, + out int charsUsed, out int bytesUsed, out bool completed) + { + // Validate parameters + if (chars == null || bytes == null) + throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), + SR.ArgumentNull_Array); + + if (charIndex < 0 || charCount < 0) + throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), + SR.ArgumentOutOfRange_NeedNonNegNum); + + if (byteIndex < 0 || byteCount < 0) + throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), + SR.ArgumentOutOfRange_NeedNonNegNum); + + if (chars.Length - charIndex < charCount) + throw new ArgumentOutOfRangeException(nameof(chars), + SR.ArgumentOutOfRange_IndexCountBuffer); + + if (bytes.Length - byteIndex < byteCount) + throw new ArgumentOutOfRangeException(nameof(bytes), + SR.ArgumentOutOfRange_IndexCountBuffer); + Contract.EndContractBlock(); + + charsUsed = charCount; + + // Its easy to do if it won't overrun our buffer. + // Note: We don't want to call unsafe version because that might be an untrusted version + // which could be really unsafe and we don't want to mix it up. + while (charsUsed > 0) + { + if (GetByteCount(chars, charIndex, charsUsed, flush) <= byteCount) + { + bytesUsed = GetBytes(chars, charIndex, charsUsed, bytes, byteIndex, flush); + completed = (charsUsed == charCount && + (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0)); + return; + } + + // Try again with 1/2 the count, won't flush then 'cause won't read it all + flush = false; + charsUsed /= 2; + } + + // Oops, we didn't have anything, we'll have to throw an overflow + throw new ArgumentException(SR.Argument_ConversionOverflow); + } + + // Same thing, but using pointers + // + // Might consider checking Max...Count to avoid the extra counting step. + // + // Note that if all of the input chars are not consumed, then we'll do a /2, which means + // that its likely that we didn't consume as many chars as we could have. For some + // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream) + [CLSCompliant(false)] + public virtual unsafe void Convert(char* chars, int charCount, + byte* bytes, int byteCount, bool flush, + out int charsUsed, out int bytesUsed, out bool completed) + { + // Validate input parameters + if (bytes == null || chars == null) + throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), + SR.ArgumentNull_Array); + if (charCount < 0 || byteCount < 0) + throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), + SR.ArgumentOutOfRange_NeedNonNegNum); + Contract.EndContractBlock(); + + // Get ready to do it + charsUsed = charCount; + + // Its easy to do if it won't overrun our buffer. + while (charsUsed > 0) + { + if (GetByteCount(chars, charsUsed, flush) <= byteCount) + { + bytesUsed = GetBytes(chars, charsUsed, bytes, byteCount, flush); + completed = (charsUsed == charCount && + (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0)); + return; + } + + // Try again with 1/2 the count, won't flush then 'cause won't read it all + flush = false; + charsUsed /= 2; + } + + // Oops, we didn't have anything, we'll have to throw an overflow + throw new ArgumentException(SR.Argument_ConversionOverflow); + } + } +} + |