diff options
Diffstat (limited to 'src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs')
-rw-r--r-- | src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs | 1194 |
1 files changed, 0 insertions, 1194 deletions
diff --git a/src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs b/src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs deleted file mode 100644 index 28b85d591e..0000000000 --- a/src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs +++ /dev/null @@ -1,1194 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding -namespace System.Text -{ - using System; - using System.Diagnostics; - using System.Diagnostics.Contracts; - using System.Text; - using System.Threading; - using System.Runtime.Serialization; - using System.Security; - using System.Security.Permissions; - - // DBCSCodePageEncoding - // - [Serializable] - internal class DBCSCodePageEncoding : BaseCodePageEncoding, ISerializable - { - // Pointers to our memory section parts - [NonSerialized] - protected unsafe char* mapBytesToUnicode = null; // char 65536 - [NonSerialized] - protected unsafe ushort* mapUnicodeToBytes = null; // byte 65536 - [NonSerialized] - protected unsafe int* mapCodePageCached = null; // to remember which CP is cached - - [NonSerialized] - protected const char UNKNOWN_CHAR_FLAG=(char)0x0; - [NonSerialized] - protected const char UNICODE_REPLACEMENT_CHAR=(char)0xFFFD; - [NonSerialized] - protected const char LEAD_BYTE_CHAR=(char)0xFFFE; // For lead bytes - - // Note that even though we provide bytesUnknown and byteCountUnknown, - // They aren't actually used because of the fallback mechanism. (char is though) - [NonSerialized] - ushort bytesUnknown; - [NonSerialized] - int byteCountUnknown; - [NonSerialized] - protected char charUnknown = (char)0; - - public DBCSCodePageEncoding(int codePage) : this(codePage, codePage) - { - } - - internal DBCSCodePageEncoding(int codePage, int dataCodePage) : base(codePage, dataCodePage) - { - } - - // Constructor called by serialization. - // Note: We use the base GetObjectData however - internal DBCSCodePageEncoding(SerializationInfo info, StreamingContext context) : base(0) - { - // Actually this can't ever get called, CodePageEncoding is our proxy - Debug.Assert(false, "Didn't expect to make it to DBCSCodePageEncoding serialization constructor"); - throw new ArgumentNullException("this"); - } - - // MBCS data section: - // - // We treat each multibyte pattern as 2 bytes in our table. If its a single byte, then the high byte - // for that position will be 0. When the table is loaded, leading bytes are flagged with 0xFFFE, so - // when reading the table look up with each byte. If the result is 0xFFFE, then use 2 bytes to read - // further data. FFFF is a special value indicating that the unicode code is the same as the - // character code (this helps us support code points < 0x20). FFFD is used as replacement character. - // - // Normal table: - // WCHAR* - Starting with MB code point 0. - // FFFF indicates we are to use the multibyte value for our code point. - // FFFE is the lead byte mark. (This should only appear in positions < 0x100) - // FFFD is the replacement (unknown character) mark. - // 2-20 means to advance the pointer 2-0x20 characters. - // 1 means that to advance to the multibyte position contained in the next char. - // 0 nothing special (I don't think its possible.) - // - // Table ends when multibyte position has advanced to 0xFFFF. - // - // Bytes->Unicode Best Fit table: - // WCHAR* - Same as normal table, except first wchar is byte position to start at. - // - // Unicode->Bytes Best Fit Table: - // WCHAR* - Same as normal table, except first wchar is char position to start at and - // we loop through unicode code points and the table has the byte points that - // corrospond to those unicode code points. - // We have a managed code page entry, so load our tables - // - protected override unsafe void LoadManagedCodePage() - { - // Should be loading OUR code page - Debug.Assert(pCodePage->CodePage == this.dataTableCodePage, - "[DBCSCodePageEncoding.LoadManagedCodePage]Expected to load data table code page"); - - // Make sure we're really a 1 byte code page - if (pCodePage->ByteCount != 2) - throw new NotSupportedException( - Environment.GetResourceString("NotSupported_NoCodepageData", CodePage)); - // Remember our unknown bytes & chars - bytesUnknown = pCodePage->ByteReplace; - charUnknown = pCodePage->UnicodeReplace; - - // Need to make sure the fallback buffer's fallback char is correct - if (this.DecoderFallback.IsMicrosoftBestFitFallback) - { - ((InternalDecoderBestFitFallback)(this.DecoderFallback)).cReplacement = charUnknown; - } - - // Is our replacement bytesUnknown a single or double byte character? - byteCountUnknown = 1; - if (bytesUnknown > 0xff) - byteCountUnknown++; - - // We use fallback encoder, which uses ?, which so far all of our tables do as well - Debug.Assert(bytesUnknown == 0x3f, - "[DBCSCodePageEncoding.LoadManagedCodePage]Expected 0x3f (?) as unknown byte character"); - - // Get our mapped section (bytes to allocate = 2 bytes per 65536 Unicode chars + 2 bytes per 65536 DBCS chars) - // Plus 4 byte to remember CP # when done loading it. (Don't want to get IA64 or anything out of alignment) - byte *pMemorySection = GetSharedMemory(65536 * 2 * 2 + 4 + this.iExtraBytes); - - mapBytesToUnicode = (char*)pMemorySection; - mapUnicodeToBytes = (ushort*)(pMemorySection + 65536 * 2); - mapCodePageCached = (int*)(pMemorySection + 65536 * 2 * 2 + this.iExtraBytes); - - // If its cached (& filled in) we don't have to do anything else - if (*mapCodePageCached != 0) - { - Debug.Assert(((*mapCodePageCached == this.dataTableCodePage && this.bFlagDataTable) || - (*mapCodePageCached == this.CodePage && !this.bFlagDataTable)), - "[DBCSCodePageEncoding.LoadManagedCodePage]Expected mapped section cached page flag to be set to data table or regular code page."); - - // Special case for GB18030 because it mangles its own code page after this function - if ((*mapCodePageCached != this.dataTableCodePage && this.bFlagDataTable) || - (*mapCodePageCached != this.CodePage && !this.bFlagDataTable)) - throw new OutOfMemoryException( - Environment.GetResourceString("Arg_OutOfMemoryException")); - - // If its cached (& filled in) we don't have to do anything else - return; - } - - // Need to read our data file and fill in our section. - // WARNING: Multiple code pieces could do this at once (so we don't have to lock machine-wide) - // so be careful here. Only stick legal values in here, don't stick temporary values. - - // Move to the beginning of the data section - char* pData = (char*)&(pCodePage->FirstDataWord); - - // We start at bytes position 0 - int bytePosition = 0; - int useBytes = 0; - - while (bytePosition < 0x10000) - { - // Get the next byte - char input = *pData; - pData++; - - // build our table: - if (input == 1) - { - // Use next data as our byte position - bytePosition = (int)(*pData); - pData++; - continue; - } - else if (input < 0x20 && input > 0) - { - // Advance input characters - bytePosition += input; - continue; - } - else if (input == 0xFFFF) - { - // Same as our bytePosition - useBytes = bytePosition; - input = unchecked((char)bytePosition); - } - else if (input == LEAD_BYTE_CHAR) // 0xfffe - { - // Lead byte mark - Debug.Assert(bytePosition < 0x100, "[DBCSCodePageEncoding.LoadManagedCodePage]expected lead byte to be < 0x100"); - useBytes = bytePosition; - // input stays 0xFFFE - } - else if (input == UNICODE_REPLACEMENT_CHAR) - { - // Replacement char is already done - bytePosition++; - continue; - } - else - { - // Use this character - useBytes = bytePosition; - // input == input; - } - - // We may need to clean up the selected character & position - if (CleanUpBytes(ref useBytes)) - { - // Use this selected character at the selected position, don't do this if not supposed to. - if (input != LEAD_BYTE_CHAR) - { - // Don't do this for lead byte marks. - mapUnicodeToBytes[input] = unchecked((ushort)useBytes); - } - mapBytesToUnicode[useBytes] = input; - } - bytePosition++; - } - - // See if we have any clean up junk to do - CleanUpEndBytes(mapBytesToUnicode); - - // We're done with our mapped section, set our flag so others don't have to rebuild table. - // We only do this if we're flagging(using) the data table as our primary mechanism - if (this.bFlagDataTable) - *mapCodePageCached = this.dataTableCodePage; - } - - // Any special processing for this code page - protected virtual bool CleanUpBytes(ref int bytes) - { - return true; - } - - // Any special processing for this code page - protected virtual unsafe void CleanUpEndBytes(char* chars) - { - } - - // Private object for locking instead of locking on a public type for SQL reliability work. - private static Object s_InternalSyncObject; - private static Object InternalSyncObject - { - get - { - if (s_InternalSyncObject == null) - { - Object o = new Object(); - Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null); - } - return s_InternalSyncObject; - } - } - - // Read in our best fit table - protected unsafe override void ReadBestFitTable() - { - // Lock so we don't confuse ourselves. - lock(InternalSyncObject) - { - // If we got a best fit array already then don't do this - if (arrayUnicodeBestFit == null) - { - // - // Read in Best Fit table. - // - - // First we have to advance past original character mapping table - // Move to the beginning of the data section - char* pData = (char*)&(pCodePage->FirstDataWord); - - // We start at bytes position 0 - int bytesPosition = 0; - - while (bytesPosition < 0x10000) - { - // Get the next byte - char input = *pData; - pData++; - - // build our table: - if (input == 1) - { - // Use next data as our byte position - bytesPosition = (int)(*pData); - pData++; - } - else if (input < 0x20 && input > 0) - { - // Advance input characters - bytesPosition += input; - } - else - { - // All other cases add 1 to bytes position - bytesPosition++; - } - } - - // Now bytesPosition is at start of bytes->unicode best fit table - char* pBytes2Unicode = pData; - - // Now pData should be pointing to first word of bytes -> unicode best fit table - // (which we're also not using at the moment) - int iBestFitCount = 0; - bytesPosition = *pData; - pData++; - - while (bytesPosition < 0x10000) - { - // Get the next byte - char input = *pData; - pData++; - - // build our table: - if (input == 1) - { - // Use next data as our byte position - bytesPosition = (int)(*pData); - pData++; - } - else if (input < 0x20 && input > 0) - { - // Advance input characters - bytesPosition += input; - } - else - { - // Use this character (unless its unknown, unk just skips 1) - if (input != UNICODE_REPLACEMENT_CHAR) - { - int correctedChar = bytesPosition; - if (CleanUpBytes(ref correctedChar)) - { - // Sometimes correction makes them same as no best fit, skip those. - if (mapBytesToUnicode[correctedChar] != input) - { - iBestFitCount++; - } - } - } - - // Position gets incremented in any case. - bytesPosition++; - } - - } - - // Now we know how big the best fit table has to be - char[] arrayTemp = new char[iBestFitCount * 2]; - - // Now we know how many best fits we have, so go back & read them in - iBestFitCount = 0; - pData = pBytes2Unicode; - bytesPosition = *pData; - pData++; - bool bOutOfOrder = false; - - // Read it all in again - while (bytesPosition < 0x10000) - { - // Get the next byte - char input = *pData; - pData++; - - // build our table: - if (input == 1) - { - // Use next data as our byte position - bytesPosition = (int)(*pData); - pData++; - } - else if (input < 0x20 && input > 0) - { - // Advance input characters - bytesPosition += input; - } - else - { - // Use this character (unless its unknown, unk just skips 1) - if (input != UNICODE_REPLACEMENT_CHAR) - { - int correctedChar = bytesPosition; - if (CleanUpBytes(ref correctedChar)) - { - // Sometimes correction makes them same as no best fit, skip those. - if (mapBytesToUnicode[correctedChar] != input) - { - if (correctedChar != bytesPosition) - bOutOfOrder = true; - - arrayTemp[iBestFitCount++] = unchecked((char)correctedChar); - arrayTemp[iBestFitCount++] = input; - } - } - } - - // Position gets incremented in any case. - bytesPosition++; - } - } - - // If they're out of order we need to sort them. - if (bOutOfOrder) - { - Debug.Assert((arrayTemp.Length / 2) < 20, - "[DBCSCodePageEncoding.ReadBestFitTable]Expected small best fit table < 20 for code page " + CodePage + ", not " + arrayTemp.Length / 2); - - for (int i = 0; i < arrayTemp.Length - 2; i+=2) - { - int iSmallest = i; - char cSmallest = arrayTemp[i]; - - for (int j = i + 2; j < arrayTemp.Length; j+=2) - { - // Find smallest one for front - if (cSmallest > arrayTemp[j]) - { - cSmallest = arrayTemp[j]; - iSmallest = j; - } - } - - // If smallest one is something else, switch them - if (iSmallest != i) - { - char temp = arrayTemp[iSmallest]; - arrayTemp[iSmallest] = arrayTemp[i]; - arrayTemp[i] = temp; - temp = arrayTemp[iSmallest+1]; - arrayTemp[iSmallest+1] = arrayTemp[i+1]; - arrayTemp[i+1] = temp; - } - } - } - - // Remember our array - arrayBytesBestFit = arrayTemp; - - // Now were at beginning of Unicode -> Bytes best fit table, need to count them - char* pUnicode2Bytes = pData; - int unicodePosition = *(pData++); - iBestFitCount = 0; - - while (unicodePosition < 0x10000) - { - // Get the next byte - char input = *pData; - pData++; - - // build our table: - if (input == 1) - { - // Use next data as our byte position - unicodePosition = (int)*pData; - pData++; - } - else if (input < 0x20 && input > 0) - { - // Advance input characters - unicodePosition += input; - } - else - { - // Same as our unicodePosition or use this character - if (input > 0) - iBestFitCount++; - unicodePosition++; - } - } - - // Allocate our table - arrayTemp = new char[iBestFitCount*2]; - - // Now do it again to fill the array with real values - pData = pUnicode2Bytes; - unicodePosition = *(pData++); - iBestFitCount = 0; - - while (unicodePosition < 0x10000) - { - // Get the next byte - char input = *pData; - pData++; - - // build our table: - if (input == 1) - { - // Use next data as our byte position - unicodePosition = (int)*pData; - pData++; - } - else if (input < 0x20 && input > 0) - { - // Advance input characters - unicodePosition += input; - } - else - { - if (input > 0) - { - // Use this character, may need to clean it up - int correctedChar = (int)input; - if (CleanUpBytes(ref correctedChar)) - { - arrayTemp[iBestFitCount++] = unchecked((char)unicodePosition); - // Have to map it to Unicode because best fit will need unicode value of best fit char. - arrayTemp[iBestFitCount++] = mapBytesToUnicode[correctedChar]; - - // This won't work if it won't round trip. - // We can't do this assert for CP 51932 & 50220 because they aren't - // calling CleanUpBytes() for best fit. All the string stuff here - // also makes this assert slow. - // Debug.Assert(arrayTemp[iBestFitCount-1] != (char)0xFFFD, String.Format( - // "[DBCSCodePageEncoding.ReadBestFitTable] No valid Unicode value {0:X4} for round trip bytes {1:X4}, encoding {2}", - // (int)mapBytesToUnicode[input], (int)input, CodePage)); - } - } - unicodePosition++; - } - } - - // Remember our array - arrayUnicodeBestFit = arrayTemp; - } - - } - } - - // GetByteCount - // Note: We start by assuming that the output will be the same as count. Having - // an encoder or fallback may change that assumption - internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) - { - // Just need to ASSERT, this is called by something else internal that checked parameters already - Debug.Assert(count >= 0, "[DBCSCodePageEncoding.GetByteCount]count is negative"); - Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetByteCount]chars is null"); - - // Assert because we shouldn't be able to have a null encoder. - Debug.Assert(encoderFallback != null, "[DBCSCodePageEncoding.GetByteCount]Attempting to use null fallback"); - - CheckMemorySection(); - - // Get any left over characters - char charLeftOver = (char)0; - if (encoder != null) - { - charLeftOver = encoder.charLeftOver; - - // Only count if encoder.m_throwOnOverflow - if (encoder.InternalHasFallbackBuffer && encoder.FallbackBuffer.Remaining > 0) - throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", - this.EncodingName, encoder.Fallback.GetType())); - } - - // prepare our end - int byteCount = 0; - char* charEnd = chars + count; - - // For fallback we will need a fallback buffer - EncoderFallbackBuffer fallbackBuffer = null; - - // We may have a left over character from last time, try and process it. - if (charLeftOver > 0) - { - Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[DBCSCodePageEncoding.GetByteCount]leftover character should be high surrogate"); - Debug.Assert(encoder != null, - "[DBCSCodePageEncoding.GetByteCount]Expect to have encoder if we have a charLeftOver"); - - // Since left over char was a surrogate, it'll have to be fallen back. - // Get Fallback - fallbackBuffer = encoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); - // This will fallback a pair if *chars is a low surrogate - fallbackBuffer.InternalFallback(charLeftOver, ref chars); - } - - // Now we may have fallback char[] already (from the encoder) - - // We have to use fallback method. - char ch; - while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 || - chars < charEnd) - { - // First unwind any fallback - if (ch == 0) - { - // No fallback, just get next char - ch = *chars; - chars++; - } - - // get byte for this char - ushort sTemp = mapUnicodeToBytes[ch]; - - // Check for fallback, this'll catch surrogate pairs too. - if (sTemp == 0 && ch != (char)0) - { - if (fallbackBuffer == null) - { - // Initialize the buffer - if (encoder == null) - fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); - else - fallbackBuffer = encoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(charEnd - count, charEnd, encoder, false); - } - - // Get Fallback - fallbackBuffer.InternalFallback(ch, ref chars); - continue; - } - - // We'll use this one - byteCount++; - if (sTemp >= 0x100) - byteCount++; - } - - return (int)byteCount; - } - - internal override unsafe int GetBytes(char* chars, int charCount, - byte* bytes, int byteCount, EncoderNLS encoder) - { - // Just need to ASSERT, this is called by something else internal that checked parameters already - Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetBytes]bytes is null"); - Debug.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetBytes]byteCount is negative"); - Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetBytes]chars is null"); - Debug.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetBytes]charCount is negative"); - - // Assert because we shouldn't be able to have a null encoder. - Debug.Assert(encoderFallback != null, "[DBCSCodePageEncoding.GetBytes]Attempting to use null encoder fallback"); - - CheckMemorySection(); - - // For fallback we will need a fallback buffer - EncoderFallbackBuffer fallbackBuffer = null; - - // prepare our end - char* charEnd = chars + charCount; - char* charStart = chars; - byte* byteStart = bytes; - byte* byteEnd = bytes + byteCount; - - // Get any left over characters - char charLeftOver = (char)0; - if (encoder != null) - { - charLeftOver = encoder.charLeftOver; - Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver), - "[DBCSCodePageEncoding.GetBytes]leftover character should be high surrogate"); - - // Go ahead and get the fallback buffer (need leftover fallback if converting) - fallbackBuffer = encoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true); - - // If we're not converting we must not have a fallback buffer - if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0) - throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", - this.EncodingName, encoder.Fallback.GetType())); - - // We may have a left over character from last time, try and process it. - if (charLeftOver > 0) - { - Debug.Assert(encoder != null, - "[DBCSCodePageEncoding.GetBytes]Expect to have encoder if we have a charLeftOver"); - - // Since left over char was a surrogate, it'll have to be fallen back. - // Get Fallback - fallbackBuffer.InternalFallback(charLeftOver, ref chars); - } - } - - // Now we may have fallback char[] already from the encoder - - // Go ahead and do it, including the fallback. - char ch; - while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 || - chars < charEnd) - { - // First unwind any fallback - if (ch == 0) - { - // No fallback, just get next char - ch = *chars; - chars++; - } - - // get byte for this char - ushort sTemp = mapUnicodeToBytes[ch]; - - // Check for fallback, this'll catch surrogate pairs too. - if (sTemp == 0 && ch != (char)0) - { - if (fallbackBuffer == null) - { - // Initialize the buffer - Debug.Assert(encoder == null, - "[DBCSCodePageEncoding.GetBytes]Expected delayed create fallback only if no encoder."); - fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); - fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true); - } - - // Get Fallback - fallbackBuffer.InternalFallback(ch, ref chars); - continue; - } - - // We'll use this one (or two) - // Bounds check - - // Go ahead and add it, lead byte 1st if necessary - if (sTemp >= 0x100) - { - if (bytes + 1 >= byteEnd) - { - // didn't use this char, we'll throw or use buffer - if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false) - { - Debug.Assert(chars > charStart, - "[DBCSCodePageEncoding.GetBytes]Expected chars to have advanced (double byte case)"); - chars--; // don't use last char - } - else - fallbackBuffer.MovePrevious(); // don't use last fallback - ThrowBytesOverflow(encoder, chars == charStart); // throw ? - break; // don't throw, stop - } - - *bytes = unchecked((byte)(sTemp >> 8)); - bytes++; - } - // Single byte - else if (bytes >= byteEnd) - { - // didn't use this char, we'll throw or use buffer - if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false) - { - Debug.Assert(chars > charStart, - "[DBCSCodePageEncoding.GetBytes]Expected chars to have advanced (single byte case)"); - chars--; // don't use last char - } - else - fallbackBuffer.MovePrevious(); // don't use last fallback - ThrowBytesOverflow(encoder, chars == charStart); // throw ? - break; // don't throw, stop - } - - *bytes = unchecked((byte)(sTemp & 0xff)); - bytes++; - } - - // encoder stuff if we have one - if (encoder != null) - { - // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases - if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder) - // Clear it in case of MustFlush - encoder.charLeftOver = (char)0; - - // Set our chars used count - encoder.m_charsUsed = (int)(chars - charStart); - } - - // If we're not converting we must not have a fallback buffer - // (We don't really have a way to clear none-encoder using fallbacks however) -// Debug.Assert((encoder == null || encoder.m_throwOnOverflow) && -// (fallbackBuffer == null || fallbackBuffer.Remaining == 0), -// "[DBCSEncoding.GetBytes]Expected empty fallback buffer at end if not converting"); - - return (int)(bytes - byteStart); - } - - // This is internal and called by something else, - internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) - { - // Just assert, we're called internally so these should be safe, checked already - Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetCharCount]bytes is null"); - Debug.Assert(count >= 0, "[DBCSCodePageEncoding.GetCharCount]byteCount is negative"); - - CheckMemorySection(); - - // Fix our decoder - DBCSDecoder decoder = (DBCSDecoder)baseDecoder; - - // Get our fallback - DecoderFallbackBuffer fallbackBuffer = null; - - // We'll need to know where the end is - byte* byteEnd = bytes + count; - int charCount = count; // Assume 1 char / byte - - // Shouldn't have anything in fallback buffer for GetCharCount - // (don't have to check m_throwOnOverflow for count) - Debug.Assert(decoder == null || - !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, - "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at start"); - - // If we have a left over byte, use it - if (decoder != null && decoder.bLeftOver > 0) - { - // We have a left over byte? - if (count == 0) - { - // No input though - if (!decoder.MustFlush) - { - // Don't have to flush - return 0; - } - - - Debug.Assert(fallbackBuffer == null, - "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer"); - fallbackBuffer = decoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(bytes, null); - - byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) }; - return fallbackBuffer.InternalFallback(byteBuffer, bytes); - } - - // Get our full info - int iBytes = decoder.bLeftOver << 8; - iBytes |= (*bytes); - bytes++; - - // This is either 1 known char or fallback - // Already counted 1 char - // Look up our bytes - char cDecoder = mapBytesToUnicode[iBytes]; - if (cDecoder == 0 && iBytes != 0) - { - // Deallocate preallocated one - charCount--; - - // We'll need a fallback - Debug.Assert(fallbackBuffer == null, - "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer for unknown pair"); - fallbackBuffer = decoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(byteEnd - count, null); - - // Do fallback, we know there're 2 bytes - byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) }; - charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); - } - // else we already reserved space for this one. - } - - // Loop, watch out for fallbacks - while (bytes < byteEnd) - { - // Faster if don't use *bytes++; - int iBytes = *bytes; - bytes++; - char c = mapBytesToUnicode[iBytes]; - - // See if it was a double byte character - if (c == LEAD_BYTE_CHAR) - { - // Its a lead byte - charCount--; // deallocate preallocated lead byte - if (bytes < byteEnd) - { - // Have another to use, so use it - iBytes <<= 8; - iBytes |= *bytes; - bytes++; - c = mapBytesToUnicode[iBytes]; - } - else - { - // No input left - if (decoder == null || decoder.MustFlush) - { - // have to flush anyway, set to unknown so we use fallback in a 'sec - charCount++; // reallocate deallocated lead byte - c = UNKNOWN_CHAR_FLAG; - } - else - { - // We'll stick it in decoder - break; - } - } - } - - // See if it was unknown. - // Unknown and known chars already allocated, but fallbacks aren't - if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) - { - if (fallbackBuffer == null) - { - if (decoder == null) - fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); - else - fallbackBuffer = decoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(byteEnd - count, null); - } - - // Do fallback - charCount--; // Get rid of preallocated extra char - byte[] byteBuffer = null; - if (iBytes < 0x100) - byteBuffer = new byte[] { unchecked((byte)iBytes) }; - else - byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) }; - charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); - } - } - - // Shouldn't have anything in fallback buffer for GetChars - Debug.Assert(decoder == null || !decoder.m_throwOnOverflow || - !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, - "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at end"); - - // Return our count - return charCount; - } - - internal override unsafe int GetChars(byte* bytes, int byteCount, - char* chars, int charCount, DecoderNLS baseDecoder) - { - // Just need to ASSERT, this is called by something else internal that checked parameters already - Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetChars]bytes is null"); - Debug.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetChars]byteCount is negative"); - Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetChars]chars is null"); - Debug.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetChars]charCount is negative"); - - CheckMemorySection(); - - // Fix our decoder - DBCSDecoder decoder = (DBCSDecoder)baseDecoder; - - // We'll need to know where the end is - byte* byteStart = bytes; - byte* byteEnd = bytes + byteCount; - char* charStart = chars; - char* charEnd = chars + charCount; - bool bUsedDecoder = false; - - // Get our fallback - DecoderFallbackBuffer fallbackBuffer = null; - - // Shouldn't have anything in fallback buffer for GetChars - Debug.Assert(decoder == null || !decoder.m_throwOnOverflow || - !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, - "[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start"); - - // If we have a left over byte, use it - if (decoder != null && decoder.bLeftOver > 0) - { - // We have a left over byte? - if (byteCount == 0) - { - // No input though - if (!decoder.MustFlush) - { - // Don't have to flush - return 0; - } - - // Well, we're flushing, so use '?' or fallback - // fallback leftover byte - Debug.Assert(fallbackBuffer == null, - "[DBCSCodePageEncoding.GetChars]Expected empty fallback"); - fallbackBuffer = decoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(bytes, charEnd); - - // If no room its hopeless, this was 1st fallback - byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) }; - if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) - ThrowCharsOverflow(decoder, true); - - decoder.bLeftOver = 0; - - // Done, return it - return (int)(chars-charStart); - } - - // Get our full info - int iBytes = decoder.bLeftOver << 8; - iBytes |= (*bytes); - bytes++; - - // Look up our bytes - char cDecoder = mapBytesToUnicode[iBytes]; - if (cDecoder == UNKNOWN_CHAR_FLAG && iBytes != 0) - { - Debug.Assert(fallbackBuffer == null, - "[DBCSCodePageEncoding.GetChars]Expected empty fallback for two bytes"); - fallbackBuffer = decoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd); - - byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) }; - if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) - ThrowCharsOverflow(decoder, true); - } - else - { - // Do we have output room?, hopeless if not, this is first char - if (chars >= charEnd) - ThrowCharsOverflow(decoder, true); - - *(chars++) = cDecoder; - } - } - - // Loop, paying attention to our fallbacks. - while (bytes < byteEnd) - { - // Faster if don't use *bytes++; - int iBytes = *bytes; - bytes++; - char c = mapBytesToUnicode[iBytes]; - - // See if it was a double byte character - if (c == LEAD_BYTE_CHAR) - { - // Its a lead byte - if (bytes < byteEnd) - { - // Have another to use, so use it - iBytes <<= 8; - iBytes |= *bytes; - bytes++; - c = mapBytesToUnicode[iBytes]; - } - else - { - // No input left - if (decoder == null || decoder.MustFlush) - { - // have to flush anyway, set to unknown so we use fallback in a 'sec - c = UNKNOWN_CHAR_FLAG; - } - else - { - // Stick it in decoder - bUsedDecoder = true; - decoder.bLeftOver = (byte)iBytes; - break; - } - } - } - - // See if it was unknown - if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) - { - if (fallbackBuffer == null) - { - if (decoder == null) - fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); - else - fallbackBuffer = decoder.FallbackBuffer; - fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd); - } - - // Do fallback - byte[] byteBuffer = null; - if (iBytes < 0x100) - byteBuffer = new byte[] { unchecked((byte)iBytes) }; - else - byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) }; - if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) - { - // May or may not throw, but we didn't get these byte(s) - Debug.Assert(bytes >= byteStart + byteBuffer.Length, - "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for fallback"); - bytes-=byteBuffer.Length; // didn't use these byte(s) - fallbackBuffer.InternalReset(); // Didn't fall this back - ThrowCharsOverflow(decoder, bytes == byteStart); // throw? - break; // don't throw, but stop loop - } - } - else - { - // Do we have buffer room? - if (chars >= charEnd) - { - // May or may not throw, but we didn't get these byte(s) - Debug.Assert(bytes > byteStart, - "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for lead byte"); - bytes--; // unused byte - if (iBytes >= 0x100) - { - Debug.Assert(bytes > byteStart, - "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for trail byte"); - bytes--; // 2nd unused byte - } - ThrowCharsOverflow(decoder, bytes == byteStart); // throw? - break; // don't throw, but stop loop - } - - *(chars++) = c; - } - } - - // We already stuck it in encoder if necessary, but we have to clear cases where nothing new got into decoder - if (decoder != null) - { - // Clear it in case of MustFlush - if (bUsedDecoder == false) - { - decoder.bLeftOver = 0; - } - - // Remember our count - decoder.m_bytesUsed = (int)(bytes - byteStart); - } - - // Shouldn't have anything in fallback buffer for GetChars - Debug.Assert(decoder == null || !decoder.m_throwOnOverflow || - !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, - "[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at end"); - - // Return length of our output - return (int)(chars - charStart); - } - - public override int GetMaxByteCount(int charCount) - { - if (charCount < 0) - throw new ArgumentOutOfRangeException(nameof(charCount), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - Contract.EndContractBlock(); - - // Characters would be # of characters + 1 in case high surrogate is ? * max fallback - long byteCount = (long)charCount + 1; - - if (EncoderFallback.MaxCharCount > 1) - byteCount *= EncoderFallback.MaxCharCount; - - // 2 to 1 is worst case. Already considered surrogate fallback - byteCount *= 2; - - if (byteCount > 0x7fffffff) - throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); - - return (int)byteCount; - } - - public override int GetMaxCharCount(int byteCount) - { - if (byteCount < 0) - throw new ArgumentOutOfRangeException(nameof(byteCount), - Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); - Contract.EndContractBlock(); - - // DBCS is pretty much the same, but could have hanging high byte making extra ? and fallback for unknown - long charCount = ((long)byteCount + 1); - - // 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer. - if (DecoderFallback.MaxCharCount > 1) - charCount *= DecoderFallback.MaxCharCount; - - if (charCount > 0x7fffffff) - throw new ArgumentOutOfRangeException(nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow")); - - return (int)charCount; - } - - public override Decoder GetDecoder() - { - return new DBCSDecoder(this); - } - - [Serializable] - internal class DBCSDecoder : DecoderNLS - { - // Need a place for the last left over byte - internal byte bLeftOver = 0; - - public DBCSDecoder(DBCSCodePageEncoding encoding) : base(encoding) - { - // Base calls reset - } - - public override void Reset() - { - this.bLeftOver = 0; - if (m_fallbackBuffer != null) - m_fallbackBuffer.Reset(); - } - - // Anything left in our decoder? - internal override bool HasState - { - get - { - return (this.bLeftOver != 0); - } - } - } - } -} -#endif // FEATURE_CODEPAGES_FILE - |