summaryrefslogtreecommitdiff
path: root/src/mscorlib/src/System/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/mscorlib/src/System/Text')
-rw-r--r--src/mscorlib/src/System/Text/ASCIIEncoding.cs9
-rw-r--r--src/mscorlib/src/System/Text/BaseCodePageEncoding.cs332
-rw-r--r--src/mscorlib/src/System/Text/CodePageEncoding.cs136
-rw-r--r--src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs1194
-rw-r--r--src/mscorlib/src/System/Text/Decoder.cs9
-rw-r--r--src/mscorlib/src/System/Text/DecoderFallback.cs8
-rw-r--r--src/mscorlib/src/System/Text/DecoderNLS.cs11
-rw-r--r--src/mscorlib/src/System/Text/EUCJPEncoding.cs183
-rw-r--r--src/mscorlib/src/System/Text/Encoder.cs8
-rw-r--r--src/mscorlib/src/System/Text/EncoderBestFitFallback.cs2
-rw-r--r--src/mscorlib/src/System/Text/EncoderExceptionFallback.cs2
-rw-r--r--src/mscorlib/src/System/Text/EncoderNLS.cs11
-rw-r--r--src/mscorlib/src/System/Text/EncoderReplacementFallback.cs2
-rw-r--r--src/mscorlib/src/System/Text/Encoding.cs232
-rw-r--r--src/mscorlib/src/System/Text/EncodingForwarder.cs6
-rw-r--r--src/mscorlib/src/System/Text/EncodingNLS.cs1
-rw-r--r--src/mscorlib/src/System/Text/EncodingProvider.cs1
-rw-r--r--src/mscorlib/src/System/Text/GB18030Encoding.cs1365
-rw-r--r--src/mscorlib/src/System/Text/ISCIIEncoding.cs2621
-rw-r--r--src/mscorlib/src/System/Text/ISO2022Encoding.cs1983
-rw-r--r--src/mscorlib/src/System/Text/Latin1Encoding.cs3
-rw-r--r--src/mscorlib/src/System/Text/MLangCodePageEncoding.cs172
-rw-r--r--src/mscorlib/src/System/Text/Normalization.Windows.cs8
-rw-r--r--src/mscorlib/src/System/Text/Normalization.cs7
-rw-r--r--src/mscorlib/src/System/Text/SBCSCodePageEncoding.cs1009
-rw-r--r--src/mscorlib/src/System/Text/StringBuilder.cs12
-rw-r--r--src/mscorlib/src/System/Text/SurrogateEncoder.cs57
-rw-r--r--src/mscorlib/src/System/Text/UTF7Encoding.cs11
-rw-r--r--src/mscorlib/src/System/Text/UTF8Encoding.cs7
-rw-r--r--src/mscorlib/src/System/Text/UnicodeEncoding.cs8
30 files changed, 20 insertions, 9390 deletions
diff --git a/src/mscorlib/src/System/Text/ASCIIEncoding.cs b/src/mscorlib/src/System/Text/ASCIIEncoding.cs
index fc7589f2d8..07b7f3e890 100644
--- a/src/mscorlib/src/System/Text/ASCIIEncoding.cs
+++ b/src/mscorlib/src/System/Text/ASCIIEncoding.cs
@@ -6,7 +6,6 @@ namespace System.Text
{
using System;
using System.Runtime.Serialization;
- using System.Security.Permissions;
using System.Diagnostics;
using System.Diagnostics.Contracts;
@@ -22,7 +21,6 @@ namespace System.Text
//
[Serializable]
-[System.Runtime.InteropServices.ComVisible(true)]
public class ASCIIEncoding : Encoding
{
// Used by Encoding.ASCII for lazy initialization
@@ -72,7 +70,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetByteCount(char* chars, int count)
{
return EncodingForwarder.GetByteCount(this, chars, count);
@@ -100,7 +97,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
{
return EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount);
@@ -115,7 +111,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetCharCount(byte* bytes, int count)
{
return EncodingForwarder.GetCharCount(this, bytes, count);
@@ -128,7 +123,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
{
return EncodingForwarder.GetChars(this, bytes, byteCount, chars, charCount);
@@ -741,7 +735,6 @@ namespace System.Text
// True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
- [System.Runtime.InteropServices.ComVisible(false)]
public override bool IsSingleByte
{
get
@@ -750,14 +743,12 @@ namespace System.Text
}
}
- [System.Runtime.InteropServices.ComVisible(false)]
public override Decoder GetDecoder()
{
return new DecoderNLS(this);
}
- [System.Runtime.InteropServices.ComVisible(false)]
public override Encoder GetEncoder()
{
return new EncoderNLS(this);
diff --git a/src/mscorlib/src/System/Text/BaseCodePageEncoding.cs b/src/mscorlib/src/System/Text/BaseCodePageEncoding.cs
deleted file mode 100644
index 0a42237dc1..0000000000
--- a/src/mscorlib/src/System/Text/BaseCodePageEncoding.cs
+++ /dev/null
@@ -1,332 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-#if FEATURE_CODEPAGES_FILE
-namespace System.Text
-{
- using System;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
- using System.Globalization;
- using System.Runtime.InteropServices;
- using System.Security;
- using System.Collections;
- using System.Runtime.CompilerServices;
- using System.Runtime.Serialization;
- using System.Runtime.Versioning;
- using System.Security.Permissions;
- using Microsoft.Win32.SafeHandles;
-
- // Our input file data structures look like:
- //
- // Header Structure Looks Like:
- // struct NLSPlusHeader
- // {
- // WORD[16] filename; // 32 bytes
- // WORD[4] version; // 8 bytes = 40 // I.e: 3, 2, 0, 0
- // WORD count; // 2 bytes = 42 // Number of code page index's that'll follow
- // }
- //
- // Each code page section looks like:
- // struct NLSCodePageIndex
- // {
- // WORD[16] codePageName; // 32 bytes
- // WORD codePage; // +2 bytes = 34
- // WORD byteCount; // +2 bytes = 36
- // DWORD offset; // +4 bytes = 40 // Bytes from beginning of FILE.
- // }
- //
- // Each code page then has its own header
- // struct NLSCodePage
- // {
- // WORD[16] codePageName; // 32 bytes
- // WORD[4] version; // 8 bytes = 40 // I.e: 3.2.0.0
- // WORD codePage; // 2 bytes = 42
- // WORD byteCount; // 2 bytes = 44 // 1 or 2 byte code page (SBCS or DBCS)
- // WORD unicodeReplace; // 2 bytes = 46 // default replacement unicode character
- // WORD byteReplace; // 2 bytes = 48 // default replacement byte(s)
- // BYTE[] data; // data section
- // }
-
- [Serializable]
- internal abstract class BaseCodePageEncoding : EncodingNLS, ISerializable
- {
- // Static & Const stuff
- internal const String CODE_PAGE_DATA_FILE_NAME = "codepages.nlp";
- [NonSerialized]
- protected int dataTableCodePage;
-
- // Variables to help us allocate/mark our memory section correctly
- [NonSerialized]
- protected bool bFlagDataTable = true;
- [NonSerialized]
- protected int iExtraBytes = 0;
-
- // Our private unicode to bytes best fit array and visa versa.
- [NonSerialized]
- protected char[] arrayUnicodeBestFit = null;
- [NonSerialized]
- protected char[] arrayBytesBestFit = null;
-
- // This is used to help ISCII, EUCJP and ISO2022 figure out they're MlangEncodings
- [NonSerialized]
- protected bool m_bUseMlangTypeForSerialization = false;
-
- static BaseCodePageEncoding()
- {
- }
-
- //
- // This is the header for the native data table that we load from CODE_PAGE_DATA_FILE_NAME.
- //
- // Explicit layout is used here since a syntax like char[16] can not be used in sequential layout.
- [StructLayout(LayoutKind.Explicit)]
- internal unsafe struct CodePageDataFileHeader
- {
- [FieldOffset(0)]
- internal char TableName; // WORD[16]
- [FieldOffset(0x20)]
- internal ushort Version; // WORD[4]
- [FieldOffset(0x28)]
- internal short CodePageCount; // WORD
- [FieldOffset(0x2A)]
- internal short unused1; // Add a unused WORD so that CodePages is aligned with DWORD boundary.
- // Otherwise, 64-bit version will fail.
- [FieldOffset(0x2C)]
- internal CodePageIndex CodePages; // Start of code page index
- }
-
- [StructLayout(LayoutKind.Explicit, Pack=2)]
- internal unsafe struct CodePageIndex
- {
- [FieldOffset(0)]
- internal char CodePageName; // WORD[16]
- [FieldOffset(0x20)]
- internal short CodePage; // WORD
- [FieldOffset(0x22)]
- internal short ByteCount; // WORD
- [FieldOffset(0x24)]
- internal int Offset; // DWORD
- }
-
- [StructLayout(LayoutKind.Explicit)]
- internal unsafe struct CodePageHeader
- {
- [FieldOffset(0)]
- internal char CodePageName; // WORD[16]
- [FieldOffset(0x20)]
- internal ushort VersionMajor; // WORD
- [FieldOffset(0x22)]
- internal ushort VersionMinor; // WORD
- [FieldOffset(0x24)]
- internal ushort VersionRevision;// WORD
- [FieldOffset(0x26)]
- internal ushort VersionBuild; // WORD
- [FieldOffset(0x28)]
- internal short CodePage; // WORD
- [FieldOffset(0x2a)]
- internal short ByteCount; // WORD // 1 or 2 byte code page (SBCS or DBCS)
- [FieldOffset(0x2c)]
- internal char UnicodeReplace; // WORD // default replacement unicode character
- [FieldOffset(0x2e)]
- internal ushort ByteReplace; // WORD // default replacement bytes
- [FieldOffset(0x30)]
- internal short FirstDataWord; // WORD[]
- }
-
- // Initialize our global stuff
- unsafe static CodePageDataFileHeader* m_pCodePageFileHeader =
- (CodePageDataFileHeader*)GlobalizationAssembly.GetGlobalizationResourceBytePtr(
- typeof(CharUnicodeInfo).Assembly, CODE_PAGE_DATA_FILE_NAME);
-
- // Real variables
- [NonSerialized]
- unsafe protected CodePageHeader* pCodePage = null;
-
- // Safe handle wrapper around section map view
- [NonSerialized]
- protected SafeViewOfFileHandle safeMemorySectionHandle = null;
-
- // Safe handle wrapper around mapped file handle
- [NonSerialized]
- protected SafeFileMappingHandle safeFileMappingHandle = null;
-
- internal BaseCodePageEncoding(int codepage) : this(codepage, codepage)
- {
- }
-
- internal BaseCodePageEncoding(int codepage, int dataCodePage) :
- base(codepage == 0? Microsoft.Win32.Win32Native.GetACP(): codepage)
- {
- // Remember number of code page that we'll be using the table for.
- dataTableCodePage = dataCodePage;
- LoadCodePageTables();
- }
-
- // Constructor called by serialization.
- internal BaseCodePageEncoding(SerializationInfo info, StreamingContext context) : base(0)
- {
- // We cannot ever call this, we've proxied ourselved to CodePageEncoding
- throw new ArgumentNullException("this");
- }
-
- // ISerializable implementation
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // Make sure to get the base stuff too This throws if info is null
- SerializeEncoding(info, context);
- Debug.Assert(info!=null, "[BaseCodePageEncoding.GetObjectData] Expected null info to throw");
-
- // Just need Everett maxCharSize (BaseCodePageEncoding) or m_maxByteSize (MLangBaseCodePageEncoding)
- info.AddValue(m_bUseMlangTypeForSerialization ? "m_maxByteSize" : "maxCharSize",
- this.IsSingleByte ? 1 : 2);
-
- // Use this class or MLangBaseCodePageEncoding as our deserializer.
- info.SetType(m_bUseMlangTypeForSerialization ? typeof(MLangCodePageEncoding) :
- typeof(CodePageEncoding));
- }
-
- // We need to load tables for our code page
- private unsafe void LoadCodePageTables()
- {
- CodePageHeader* pCodePage = FindCodePage(dataTableCodePage);
-
- // Make sure we have one
- if (pCodePage == null)
- {
- // Didn't have one
- throw new NotSupportedException(
- Environment.GetResourceString("NotSupported_NoCodepageData", CodePage));
- }
-
- // Remember our code page
- this.pCodePage = pCodePage;
-
- // We had it, so load it
- LoadManagedCodePage();
- }
-
- // Look up the code page pointer
- private static unsafe CodePageHeader* FindCodePage(int codePage)
- {
- // We'll have to loop through all of the m_pCodePageIndex[] items to find our code page, this isn't
- // binary or anything so its not monsterously fast.
- for (int i = 0; i < m_pCodePageFileHeader->CodePageCount; i++)
- {
- CodePageIndex* pCodePageIndex = (&(m_pCodePageFileHeader->CodePages)) + i;
-
- if (pCodePageIndex->CodePage == codePage)
- {
- // Found it!
- CodePageHeader* pCodePage =
- (CodePageHeader*)((byte*)m_pCodePageFileHeader + pCodePageIndex->Offset);
- return pCodePage;
- }
- }
-
- // Couldn't find it
- return null;
- }
-
- // Get our code page byte count
- internal static unsafe int GetCodePageByteSize(int codePage)
- {
- // Get our code page info
- CodePageHeader* pCodePage = FindCodePage(codePage);
-
- // If null return 0
- if (pCodePage == null)
- return 0;
-
- Debug.Assert(pCodePage->ByteCount == 1 || pCodePage->ByteCount == 2,
- "[BaseCodePageEncoding] Code page (" + codePage + ") has invalid byte size (" + pCodePage->ByteCount + ") in table");
- // Return what it says for byte count
- return pCodePage->ByteCount;
- }
-
- // We have a managed code page entry, so load our tables
- protected abstract unsafe void LoadManagedCodePage();
-
- // Allocate memory to load our code page
- protected unsafe byte* GetSharedMemory(int iSize)
- {
- // Build our name
- String strName = GetMemorySectionName();
-
- IntPtr mappedFileHandle;
-
- // This gets shared memory for our map. If its can't, it gives us clean memory.
- Byte *pMemorySection = EncodingTable.nativeCreateOpenFileMapping(strName, iSize, out mappedFileHandle);
- Debug.Assert(pMemorySection != null,
- "[BaseCodePageEncoding.GetSharedMemory] Expected non-null memory section to be opened");
-
- // If that failed, we have to die.
- if (pMemorySection == null)
- throw new OutOfMemoryException(
- Environment.GetResourceString("Arg_OutOfMemoryException"));
-
- // if we have null file handle. this means memory was allocated after
- // failing to open the mapped file.
-
- if (mappedFileHandle != IntPtr.Zero)
- {
- safeMemorySectionHandle = new SafeViewOfFileHandle((IntPtr) pMemorySection, true);
- safeFileMappingHandle = new SafeFileMappingHandle(mappedFileHandle, true);
- }
-
- return pMemorySection;
- }
-
- protected unsafe virtual String GetMemorySectionName()
- {
- int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
-
- String strName = String.Format(CultureInfo.InvariantCulture, "NLS_CodePage_{0}_{1}_{2}_{3}_{4}",
- iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
- this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
-
- return strName;
- }
-
- protected abstract unsafe void ReadBestFitTable();
-
- internal override char[] GetBestFitUnicodeToBytesData()
- {
- // Read in our best fit table if necessary
- if (arrayUnicodeBestFit == null) ReadBestFitTable();
-
- Debug.Assert(arrayUnicodeBestFit != null,
- "[BaseCodePageEncoding.GetBestFitUnicodeToBytesData]Expected non-null arrayUnicodeBestFit");
-
- // Normally we don't have any best fit data.
- return arrayUnicodeBestFit;
- }
-
- internal override char[] GetBestFitBytesToUnicodeData()
- {
- // Read in our best fit table if necessary
- if (arrayBytesBestFit == null) ReadBestFitTable();
-
- Debug.Assert(arrayBytesBestFit != null,
- "[BaseCodePageEncoding.GetBestFitBytesToUnicodeData]Expected non-null arrayBytesBestFit");
-
- // Normally we don't have any best fit data.
- return arrayBytesBestFit;
- }
-
- // During the AppDomain shutdown the Encoding class may already finalized and the memory section
- // is invalid. so we detect that by validating the memory section handle then re-initialize the memory
- // section by calling LoadManagedCodePage() method and eventually the mapped file handle and
- // the memory section pointer will get finalized one more time.
- internal unsafe void CheckMemorySection()
- {
- if (safeMemorySectionHandle != null && safeMemorySectionHandle.DangerousGetHandle() == IntPtr.Zero)
- {
- LoadManagedCodePage();
- }
- }
- }
-}
-
-#endif // FEATURE_CODEPAGES_FILE
diff --git a/src/mscorlib/src/System/Text/CodePageEncoding.cs b/src/mscorlib/src/System/Text/CodePageEncoding.cs
deleted file mode 100644
index 7805c6580a..0000000000
--- a/src/mscorlib/src/System/Text/CodePageEncoding.cs
+++ /dev/null
@@ -1,136 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-
-// WARNING:
-//
-// This is just an IObjectReference proxy for the Code Page Encodings.
-namespace System.Text
-{
- using System;
- using System.Runtime.Serialization;
- using System.Security.Permissions;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
-
- /*=================================CodePageEncoding==================================
- ** This class is here only to deserialize the Code Page classes from Everett (V1.1) into
- ** Appropriate Whidbey (V2.0) objects. We also serialize the Whidbey classes
- ** using this proxy since we pretty much need one anyway and that solves Whidbey
- ** to Everett compatibility as well.
- ==============================================================================*/
-
- [Serializable]
- internal sealed class CodePageEncoding : IObjectReference, ISerializable
- {
- // Temp stuff
- [NonSerialized]
- private int m_codePage;
- [NonSerialized]
- private bool m_isReadOnly;
- [NonSerialized]
- private bool m_deserializedFromEverett = false;
-
- [NonSerialized]
- private EncoderFallback encoderFallback = null;
- [NonSerialized]
- private DecoderFallback decoderFallback = null;
-
- // Might need this when GetRealObjecting
- [NonSerialized]
- private Encoding realEncoding = null;
-
- // Constructor called by serialization.
- internal CodePageEncoding(SerializationInfo info, StreamingContext context)
- {
- // Any info?
- if (info==null) throw new ArgumentNullException(nameof(info));
- Contract.EndContractBlock();
-
- // All versions have a code page
- this.m_codePage = (int)info.GetValue("m_codePage", typeof(int));
-
- // See if we have a code page
- try
- {
- //
- // Try Whidbey V2.0 Fields
- //
- this.m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool));
-
- this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback));
- this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback));
- }
- catch (SerializationException)
- {
- //
- // Didn't have Whidbey things, must be Everett
- //
- this.m_deserializedFromEverett = true;
-
- // May as well be read only
- this.m_isReadOnly = true;
- }
- }
-
- // Just get it from GetEncoding
- public Object GetRealObject(StreamingContext context)
- {
- // Get our encoding (Note: This has default fallbacks for readonly and everett cases)
- this.realEncoding = Encoding.GetEncoding(this.m_codePage);
-
- // If its read only then it uses default fallbacks, otherwise pick up the new ones
- // Otherwise we want to leave the new one read only
- if (!this.m_deserializedFromEverett && !this.m_isReadOnly)
- {
- this.realEncoding = (Encoding)this.realEncoding.Clone();
- this.realEncoding.EncoderFallback = this.encoderFallback;
- this.realEncoding.DecoderFallback = this.decoderFallback;
- }
-
- return this.realEncoding;
- }
-
- // ISerializable implementation
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // We cannot ever call this.
- Debug.Assert(false, "Didn't expect to make it to CodePageEncoding ISerializable.GetObjectData");
- throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
- }
-
- // Same problem with the Decoder, this only happens with Everett Decoders
- [Serializable]
- internal sealed class Decoder : IObjectReference, ISerializable
- {
- // Might need this when GetRealObjecting
- [NonSerialized]
- private Encoding realEncoding = null;
-
- // Constructor called by serialization, have to handle deserializing from Everett
- internal Decoder(SerializationInfo info, StreamingContext context)
- {
- // Any info?
- if (info==null) throw new ArgumentNullException(nameof(info));
- Contract.EndContractBlock();
-
- this.realEncoding = (Encoding)info.GetValue("encoding", typeof(Encoding));
- }
-
- // Just get it from GetDecider
- public Object GetRealObject(StreamingContext context)
- {
- return this.realEncoding.GetDecoder();
- }
-
- // ISerializable implementation, get data for this object
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // We cannot ever call this.
- Debug.Assert(false, "Didn't expect to make it to CodePageEncoding.Decoder.GetObjectData");
- throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
- }
- }
- }
-}
diff --git a/src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs b/src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs
deleted file mode 100644
index 28b85d591e..0000000000
--- a/src/mscorlib/src/System/Text/DBCSCodePageEncoding.cs
+++ /dev/null
@@ -1,1194 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
-namespace System.Text
-{
- using System;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
- using System.Text;
- using System.Threading;
- using System.Runtime.Serialization;
- using System.Security;
- using System.Security.Permissions;
-
- // DBCSCodePageEncoding
- //
- [Serializable]
- internal class DBCSCodePageEncoding : BaseCodePageEncoding, ISerializable
- {
- // Pointers to our memory section parts
- [NonSerialized]
- protected unsafe char* mapBytesToUnicode = null; // char 65536
- [NonSerialized]
- protected unsafe ushort* mapUnicodeToBytes = null; // byte 65536
- [NonSerialized]
- protected unsafe int* mapCodePageCached = null; // to remember which CP is cached
-
- [NonSerialized]
- protected const char UNKNOWN_CHAR_FLAG=(char)0x0;
- [NonSerialized]
- protected const char UNICODE_REPLACEMENT_CHAR=(char)0xFFFD;
- [NonSerialized]
- protected const char LEAD_BYTE_CHAR=(char)0xFFFE; // For lead bytes
-
- // Note that even though we provide bytesUnknown and byteCountUnknown,
- // They aren't actually used because of the fallback mechanism. (char is though)
- [NonSerialized]
- ushort bytesUnknown;
- [NonSerialized]
- int byteCountUnknown;
- [NonSerialized]
- protected char charUnknown = (char)0;
-
- public DBCSCodePageEncoding(int codePage) : this(codePage, codePage)
- {
- }
-
- internal DBCSCodePageEncoding(int codePage, int dataCodePage) : base(codePage, dataCodePage)
- {
- }
-
- // Constructor called by serialization.
- // Note: We use the base GetObjectData however
- internal DBCSCodePageEncoding(SerializationInfo info, StreamingContext context) : base(0)
- {
- // Actually this can't ever get called, CodePageEncoding is our proxy
- Debug.Assert(false, "Didn't expect to make it to DBCSCodePageEncoding serialization constructor");
- throw new ArgumentNullException("this");
- }
-
- // MBCS data section:
- //
- // We treat each multibyte pattern as 2 bytes in our table. If its a single byte, then the high byte
- // for that position will be 0. When the table is loaded, leading bytes are flagged with 0xFFFE, so
- // when reading the table look up with each byte. If the result is 0xFFFE, then use 2 bytes to read
- // further data. FFFF is a special value indicating that the unicode code is the same as the
- // character code (this helps us support code points < 0x20). FFFD is used as replacement character.
- //
- // Normal table:
- // WCHAR* - Starting with MB code point 0.
- // FFFF indicates we are to use the multibyte value for our code point.
- // FFFE is the lead byte mark. (This should only appear in positions < 0x100)
- // FFFD is the replacement (unknown character) mark.
- // 2-20 means to advance the pointer 2-0x20 characters.
- // 1 means that to advance to the multibyte position contained in the next char.
- // 0 nothing special (I don't think its possible.)
- //
- // Table ends when multibyte position has advanced to 0xFFFF.
- //
- // Bytes->Unicode Best Fit table:
- // WCHAR* - Same as normal table, except first wchar is byte position to start at.
- //
- // Unicode->Bytes Best Fit Table:
- // WCHAR* - Same as normal table, except first wchar is char position to start at and
- // we loop through unicode code points and the table has the byte points that
- // corrospond to those unicode code points.
- // We have a managed code page entry, so load our tables
- //
- protected override unsafe void LoadManagedCodePage()
- {
- // Should be loading OUR code page
- Debug.Assert(pCodePage->CodePage == this.dataTableCodePage,
- "[DBCSCodePageEncoding.LoadManagedCodePage]Expected to load data table code page");
-
- // Make sure we're really a 1 byte code page
- if (pCodePage->ByteCount != 2)
- throw new NotSupportedException(
- Environment.GetResourceString("NotSupported_NoCodepageData", CodePage));
- // Remember our unknown bytes & chars
- bytesUnknown = pCodePage->ByteReplace;
- charUnknown = pCodePage->UnicodeReplace;
-
- // Need to make sure the fallback buffer's fallback char is correct
- if (this.DecoderFallback.IsMicrosoftBestFitFallback)
- {
- ((InternalDecoderBestFitFallback)(this.DecoderFallback)).cReplacement = charUnknown;
- }
-
- // Is our replacement bytesUnknown a single or double byte character?
- byteCountUnknown = 1;
- if (bytesUnknown > 0xff)
- byteCountUnknown++;
-
- // We use fallback encoder, which uses ?, which so far all of our tables do as well
- Debug.Assert(bytesUnknown == 0x3f,
- "[DBCSCodePageEncoding.LoadManagedCodePage]Expected 0x3f (?) as unknown byte character");
-
- // Get our mapped section (bytes to allocate = 2 bytes per 65536 Unicode chars + 2 bytes per 65536 DBCS chars)
- // Plus 4 byte to remember CP # when done loading it. (Don't want to get IA64 or anything out of alignment)
- byte *pMemorySection = GetSharedMemory(65536 * 2 * 2 + 4 + this.iExtraBytes);
-
- mapBytesToUnicode = (char*)pMemorySection;
- mapUnicodeToBytes = (ushort*)(pMemorySection + 65536 * 2);
- mapCodePageCached = (int*)(pMemorySection + 65536 * 2 * 2 + this.iExtraBytes);
-
- // If its cached (& filled in) we don't have to do anything else
- if (*mapCodePageCached != 0)
- {
- Debug.Assert(((*mapCodePageCached == this.dataTableCodePage && this.bFlagDataTable) ||
- (*mapCodePageCached == this.CodePage && !this.bFlagDataTable)),
- "[DBCSCodePageEncoding.LoadManagedCodePage]Expected mapped section cached page flag to be set to data table or regular code page.");
-
- // Special case for GB18030 because it mangles its own code page after this function
- if ((*mapCodePageCached != this.dataTableCodePage && this.bFlagDataTable) ||
- (*mapCodePageCached != this.CodePage && !this.bFlagDataTable))
- throw new OutOfMemoryException(
- Environment.GetResourceString("Arg_OutOfMemoryException"));
-
- // If its cached (& filled in) we don't have to do anything else
- return;
- }
-
- // Need to read our data file and fill in our section.
- // WARNING: Multiple code pieces could do this at once (so we don't have to lock machine-wide)
- // so be careful here. Only stick legal values in here, don't stick temporary values.
-
- // Move to the beginning of the data section
- char* pData = (char*)&(pCodePage->FirstDataWord);
-
- // We start at bytes position 0
- int bytePosition = 0;
- int useBytes = 0;
-
- while (bytePosition < 0x10000)
- {
- // Get the next byte
- char input = *pData;
- pData++;
-
- // build our table:
- if (input == 1)
- {
- // Use next data as our byte position
- bytePosition = (int)(*pData);
- pData++;
- continue;
- }
- else if (input < 0x20 && input > 0)
- {
- // Advance input characters
- bytePosition += input;
- continue;
- }
- else if (input == 0xFFFF)
- {
- // Same as our bytePosition
- useBytes = bytePosition;
- input = unchecked((char)bytePosition);
- }
- else if (input == LEAD_BYTE_CHAR) // 0xfffe
- {
- // Lead byte mark
- Debug.Assert(bytePosition < 0x100, "[DBCSCodePageEncoding.LoadManagedCodePage]expected lead byte to be < 0x100");
- useBytes = bytePosition;
- // input stays 0xFFFE
- }
- else if (input == UNICODE_REPLACEMENT_CHAR)
- {
- // Replacement char is already done
- bytePosition++;
- continue;
- }
- else
- {
- // Use this character
- useBytes = bytePosition;
- // input == input;
- }
-
- // We may need to clean up the selected character & position
- if (CleanUpBytes(ref useBytes))
- {
- // Use this selected character at the selected position, don't do this if not supposed to.
- if (input != LEAD_BYTE_CHAR)
- {
- // Don't do this for lead byte marks.
- mapUnicodeToBytes[input] = unchecked((ushort)useBytes);
- }
- mapBytesToUnicode[useBytes] = input;
- }
- bytePosition++;
- }
-
- // See if we have any clean up junk to do
- CleanUpEndBytes(mapBytesToUnicode);
-
- // We're done with our mapped section, set our flag so others don't have to rebuild table.
- // We only do this if we're flagging(using) the data table as our primary mechanism
- if (this.bFlagDataTable)
- *mapCodePageCached = this.dataTableCodePage;
- }
-
- // Any special processing for this code page
- protected virtual bool CleanUpBytes(ref int bytes)
- {
- return true;
- }
-
- // Any special processing for this code page
- protected virtual unsafe void CleanUpEndBytes(char* chars)
- {
- }
-
- // Private object for locking instead of locking on a public type for SQL reliability work.
- private static Object s_InternalSyncObject;
- private static Object InternalSyncObject
- {
- get
- {
- if (s_InternalSyncObject == null)
- {
- Object o = new Object();
- Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null);
- }
- return s_InternalSyncObject;
- }
- }
-
- // Read in our best fit table
- protected unsafe override void ReadBestFitTable()
- {
- // Lock so we don't confuse ourselves.
- lock(InternalSyncObject)
- {
- // If we got a best fit array already then don't do this
- if (arrayUnicodeBestFit == null)
- {
- //
- // Read in Best Fit table.
- //
-
- // First we have to advance past original character mapping table
- // Move to the beginning of the data section
- char* pData = (char*)&(pCodePage->FirstDataWord);
-
- // We start at bytes position 0
- int bytesPosition = 0;
-
- while (bytesPosition < 0x10000)
- {
- // Get the next byte
- char input = *pData;
- pData++;
-
- // build our table:
- if (input == 1)
- {
- // Use next data as our byte position
- bytesPosition = (int)(*pData);
- pData++;
- }
- else if (input < 0x20 && input > 0)
- {
- // Advance input characters
- bytesPosition += input;
- }
- else
- {
- // All other cases add 1 to bytes position
- bytesPosition++;
- }
- }
-
- // Now bytesPosition is at start of bytes->unicode best fit table
- char* pBytes2Unicode = pData;
-
- // Now pData should be pointing to first word of bytes -> unicode best fit table
- // (which we're also not using at the moment)
- int iBestFitCount = 0;
- bytesPosition = *pData;
- pData++;
-
- while (bytesPosition < 0x10000)
- {
- // Get the next byte
- char input = *pData;
- pData++;
-
- // build our table:
- if (input == 1)
- {
- // Use next data as our byte position
- bytesPosition = (int)(*pData);
- pData++;
- }
- else if (input < 0x20 && input > 0)
- {
- // Advance input characters
- bytesPosition += input;
- }
- else
- {
- // Use this character (unless its unknown, unk just skips 1)
- if (input != UNICODE_REPLACEMENT_CHAR)
- {
- int correctedChar = bytesPosition;
- if (CleanUpBytes(ref correctedChar))
- {
- // Sometimes correction makes them same as no best fit, skip those.
- if (mapBytesToUnicode[correctedChar] != input)
- {
- iBestFitCount++;
- }
- }
- }
-
- // Position gets incremented in any case.
- bytesPosition++;
- }
-
- }
-
- // Now we know how big the best fit table has to be
- char[] arrayTemp = new char[iBestFitCount * 2];
-
- // Now we know how many best fits we have, so go back & read them in
- iBestFitCount = 0;
- pData = pBytes2Unicode;
- bytesPosition = *pData;
- pData++;
- bool bOutOfOrder = false;
-
- // Read it all in again
- while (bytesPosition < 0x10000)
- {
- // Get the next byte
- char input = *pData;
- pData++;
-
- // build our table:
- if (input == 1)
- {
- // Use next data as our byte position
- bytesPosition = (int)(*pData);
- pData++;
- }
- else if (input < 0x20 && input > 0)
- {
- // Advance input characters
- bytesPosition += input;
- }
- else
- {
- // Use this character (unless its unknown, unk just skips 1)
- if (input != UNICODE_REPLACEMENT_CHAR)
- {
- int correctedChar = bytesPosition;
- if (CleanUpBytes(ref correctedChar))
- {
- // Sometimes correction makes them same as no best fit, skip those.
- if (mapBytesToUnicode[correctedChar] != input)
- {
- if (correctedChar != bytesPosition)
- bOutOfOrder = true;
-
- arrayTemp[iBestFitCount++] = unchecked((char)correctedChar);
- arrayTemp[iBestFitCount++] = input;
- }
- }
- }
-
- // Position gets incremented in any case.
- bytesPosition++;
- }
- }
-
- // If they're out of order we need to sort them.
- if (bOutOfOrder)
- {
- Debug.Assert((arrayTemp.Length / 2) < 20,
- "[DBCSCodePageEncoding.ReadBestFitTable]Expected small best fit table < 20 for code page " + CodePage + ", not " + arrayTemp.Length / 2);
-
- for (int i = 0; i < arrayTemp.Length - 2; i+=2)
- {
- int iSmallest = i;
- char cSmallest = arrayTemp[i];
-
- for (int j = i + 2; j < arrayTemp.Length; j+=2)
- {
- // Find smallest one for front
- if (cSmallest > arrayTemp[j])
- {
- cSmallest = arrayTemp[j];
- iSmallest = j;
- }
- }
-
- // If smallest one is something else, switch them
- if (iSmallest != i)
- {
- char temp = arrayTemp[iSmallest];
- arrayTemp[iSmallest] = arrayTemp[i];
- arrayTemp[i] = temp;
- temp = arrayTemp[iSmallest+1];
- arrayTemp[iSmallest+1] = arrayTemp[i+1];
- arrayTemp[i+1] = temp;
- }
- }
- }
-
- // Remember our array
- arrayBytesBestFit = arrayTemp;
-
- // Now were at beginning of Unicode -> Bytes best fit table, need to count them
- char* pUnicode2Bytes = pData;
- int unicodePosition = *(pData++);
- iBestFitCount = 0;
-
- while (unicodePosition < 0x10000)
- {
- // Get the next byte
- char input = *pData;
- pData++;
-
- // build our table:
- if (input == 1)
- {
- // Use next data as our byte position
- unicodePosition = (int)*pData;
- pData++;
- }
- else if (input < 0x20 && input > 0)
- {
- // Advance input characters
- unicodePosition += input;
- }
- else
- {
- // Same as our unicodePosition or use this character
- if (input > 0)
- iBestFitCount++;
- unicodePosition++;
- }
- }
-
- // Allocate our table
- arrayTemp = new char[iBestFitCount*2];
-
- // Now do it again to fill the array with real values
- pData = pUnicode2Bytes;
- unicodePosition = *(pData++);
- iBestFitCount = 0;
-
- while (unicodePosition < 0x10000)
- {
- // Get the next byte
- char input = *pData;
- pData++;
-
- // build our table:
- if (input == 1)
- {
- // Use next data as our byte position
- unicodePosition = (int)*pData;
- pData++;
- }
- else if (input < 0x20 && input > 0)
- {
- // Advance input characters
- unicodePosition += input;
- }
- else
- {
- if (input > 0)
- {
- // Use this character, may need to clean it up
- int correctedChar = (int)input;
- if (CleanUpBytes(ref correctedChar))
- {
- arrayTemp[iBestFitCount++] = unchecked((char)unicodePosition);
- // Have to map it to Unicode because best fit will need unicode value of best fit char.
- arrayTemp[iBestFitCount++] = mapBytesToUnicode[correctedChar];
-
- // This won't work if it won't round trip.
- // We can't do this assert for CP 51932 & 50220 because they aren't
- // calling CleanUpBytes() for best fit. All the string stuff here
- // also makes this assert slow.
- // Debug.Assert(arrayTemp[iBestFitCount-1] != (char)0xFFFD, String.Format(
- // "[DBCSCodePageEncoding.ReadBestFitTable] No valid Unicode value {0:X4} for round trip bytes {1:X4}, encoding {2}",
- // (int)mapBytesToUnicode[input], (int)input, CodePage));
- }
- }
- unicodePosition++;
- }
- }
-
- // Remember our array
- arrayUnicodeBestFit = arrayTemp;
- }
-
- }
- }
-
- // GetByteCount
- // Note: We start by assuming that the output will be the same as count. Having
- // an encoder or fallback may change that assumption
- internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(count >= 0, "[DBCSCodePageEncoding.GetByteCount]count is negative");
- Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetByteCount]chars is null");
-
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[DBCSCodePageEncoding.GetByteCount]Attempting to use null fallback");
-
- CheckMemorySection();
-
- // Get any left over characters
- char charLeftOver = (char)0;
- if (encoder != null)
- {
- charLeftOver = encoder.charLeftOver;
-
- // Only count if encoder.m_throwOnOverflow
- if (encoder.InternalHasFallbackBuffer && encoder.FallbackBuffer.Remaining > 0)
- throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
- this.EncodingName, encoder.Fallback.GetType()));
- }
-
- // prepare our end
- int byteCount = 0;
- char* charEnd = chars + count;
-
- // For fallback we will need a fallback buffer
- EncoderFallbackBuffer fallbackBuffer = null;
-
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[DBCSCodePageEncoding.GetByteCount]leftover character should be high surrogate");
- Debug.Assert(encoder != null,
- "[DBCSCodePageEncoding.GetByteCount]Expect to have encoder if we have a charLeftOver");
-
- // Since left over char was a surrogate, it'll have to be fallen back.
- // Get Fallback
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
- // This will fallback a pair if *chars is a low surrogate
- fallbackBuffer.InternalFallback(charLeftOver, ref chars);
- }
-
- // Now we may have fallback char[] already (from the encoder)
-
- // We have to use fallback method.
- char ch;
- while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
- chars < charEnd)
- {
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
-
- // get byte for this char
- ushort sTemp = mapUnicodeToBytes[ch];
-
- // Check for fallback, this'll catch surrogate pairs too.
- if (sTemp == 0 && ch != (char)0)
- {
- if (fallbackBuffer == null)
- {
- // Initialize the buffer
- if (encoder == null)
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(charEnd - count, charEnd, encoder, false);
- }
-
- // Get Fallback
- fallbackBuffer.InternalFallback(ch, ref chars);
- continue;
- }
-
- // We'll use this one
- byteCount++;
- if (sTemp >= 0x100)
- byteCount++;
- }
-
- return (int)byteCount;
- }
-
- internal override unsafe int GetBytes(char* chars, int charCount,
- byte* bytes, int byteCount, EncoderNLS encoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetBytes]bytes is null");
- Debug.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetBytes]byteCount is negative");
- Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetBytes]chars is null");
- Debug.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetBytes]charCount is negative");
-
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[DBCSCodePageEncoding.GetBytes]Attempting to use null encoder fallback");
-
- CheckMemorySection();
-
- // For fallback we will need a fallback buffer
- EncoderFallbackBuffer fallbackBuffer = null;
-
- // prepare our end
- char* charEnd = chars + charCount;
- char* charStart = chars;
- byte* byteStart = bytes;
- byte* byteEnd = bytes + byteCount;
-
- // Get any left over characters
- char charLeftOver = (char)0;
- if (encoder != null)
- {
- charLeftOver = encoder.charLeftOver;
- Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver),
- "[DBCSCodePageEncoding.GetBytes]leftover character should be high surrogate");
-
- // Go ahead and get the fallback buffer (need leftover fallback if converting)
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true);
-
- // If we're not converting we must not have a fallback buffer
- if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0)
- throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
- this.EncodingName, encoder.Fallback.GetType()));
-
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- Debug.Assert(encoder != null,
- "[DBCSCodePageEncoding.GetBytes]Expect to have encoder if we have a charLeftOver");
-
- // Since left over char was a surrogate, it'll have to be fallen back.
- // Get Fallback
- fallbackBuffer.InternalFallback(charLeftOver, ref chars);
- }
- }
-
- // Now we may have fallback char[] already from the encoder
-
- // Go ahead and do it, including the fallback.
- char ch;
- while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
- chars < charEnd)
- {
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
-
- // get byte for this char
- ushort sTemp = mapUnicodeToBytes[ch];
-
- // Check for fallback, this'll catch surrogate pairs too.
- if (sTemp == 0 && ch != (char)0)
- {
- if (fallbackBuffer == null)
- {
- // Initialize the buffer
- Debug.Assert(encoder == null,
- "[DBCSCodePageEncoding.GetBytes]Expected delayed create fallback only if no encoder.");
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true);
- }
-
- // Get Fallback
- fallbackBuffer.InternalFallback(ch, ref chars);
- continue;
- }
-
- // We'll use this one (or two)
- // Bounds check
-
- // Go ahead and add it, lead byte 1st if necessary
- if (sTemp >= 0x100)
- {
- if (bytes + 1 >= byteEnd)
- {
- // didn't use this char, we'll throw or use buffer
- if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
- {
- Debug.Assert(chars > charStart,
- "[DBCSCodePageEncoding.GetBytes]Expected chars to have advanced (double byte case)");
- chars--; // don't use last char
- }
- else
- fallbackBuffer.MovePrevious(); // don't use last fallback
- ThrowBytesOverflow(encoder, chars == charStart); // throw ?
- break; // don't throw, stop
- }
-
- *bytes = unchecked((byte)(sTemp >> 8));
- bytes++;
- }
- // Single byte
- else if (bytes >= byteEnd)
- {
- // didn't use this char, we'll throw or use buffer
- if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
- {
- Debug.Assert(chars > charStart,
- "[DBCSCodePageEncoding.GetBytes]Expected chars to have advanced (single byte case)");
- chars--; // don't use last char
- }
- else
- fallbackBuffer.MovePrevious(); // don't use last fallback
- ThrowBytesOverflow(encoder, chars == charStart); // throw ?
- break; // don't throw, stop
- }
-
- *bytes = unchecked((byte)(sTemp & 0xff));
- bytes++;
- }
-
- // encoder stuff if we have one
- if (encoder != null)
- {
- // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases
- if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder)
- // Clear it in case of MustFlush
- encoder.charLeftOver = (char)0;
-
- // Set our chars used count
- encoder.m_charsUsed = (int)(chars - charStart);
- }
-
- // If we're not converting we must not have a fallback buffer
- // (We don't really have a way to clear none-encoder using fallbacks however)
-// Debug.Assert((encoder == null || encoder.m_throwOnOverflow) &&
-// (fallbackBuffer == null || fallbackBuffer.Remaining == 0),
-// "[DBCSEncoding.GetBytes]Expected empty fallback buffer at end if not converting");
-
- return (int)(bytes - byteStart);
- }
-
- // This is internal and called by something else,
- internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
- {
- // Just assert, we're called internally so these should be safe, checked already
- Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetCharCount]bytes is null");
- Debug.Assert(count >= 0, "[DBCSCodePageEncoding.GetCharCount]byteCount is negative");
-
- CheckMemorySection();
-
- // Fix our decoder
- DBCSDecoder decoder = (DBCSDecoder)baseDecoder;
-
- // Get our fallback
- DecoderFallbackBuffer fallbackBuffer = null;
-
- // We'll need to know where the end is
- byte* byteEnd = bytes + count;
- int charCount = count; // Assume 1 char / byte
-
- // Shouldn't have anything in fallback buffer for GetCharCount
- // (don't have to check m_throwOnOverflow for count)
- Debug.Assert(decoder == null ||
- !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
- "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at start");
-
- // If we have a left over byte, use it
- if (decoder != null && decoder.bLeftOver > 0)
- {
- // We have a left over byte?
- if (count == 0)
- {
- // No input though
- if (!decoder.MustFlush)
- {
- // Don't have to flush
- return 0;
- }
-
-
- Debug.Assert(fallbackBuffer == null,
- "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer");
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(bytes, null);
-
- byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) };
- return fallbackBuffer.InternalFallback(byteBuffer, bytes);
- }
-
- // Get our full info
- int iBytes = decoder.bLeftOver << 8;
- iBytes |= (*bytes);
- bytes++;
-
- // This is either 1 known char or fallback
- // Already counted 1 char
- // Look up our bytes
- char cDecoder = mapBytesToUnicode[iBytes];
- if (cDecoder == 0 && iBytes != 0)
- {
- // Deallocate preallocated one
- charCount--;
-
- // We'll need a fallback
- Debug.Assert(fallbackBuffer == null,
- "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer for unknown pair");
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - count, null);
-
- // Do fallback, we know there're 2 bytes
- byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
- charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
- }
- // else we already reserved space for this one.
- }
-
- // Loop, watch out for fallbacks
- while (bytes < byteEnd)
- {
- // Faster if don't use *bytes++;
- int iBytes = *bytes;
- bytes++;
- char c = mapBytesToUnicode[iBytes];
-
- // See if it was a double byte character
- if (c == LEAD_BYTE_CHAR)
- {
- // Its a lead byte
- charCount--; // deallocate preallocated lead byte
- if (bytes < byteEnd)
- {
- // Have another to use, so use it
- iBytes <<= 8;
- iBytes |= *bytes;
- bytes++;
- c = mapBytesToUnicode[iBytes];
- }
- else
- {
- // No input left
- if (decoder == null || decoder.MustFlush)
- {
- // have to flush anyway, set to unknown so we use fallback in a 'sec
- charCount++; // reallocate deallocated lead byte
- c = UNKNOWN_CHAR_FLAG;
- }
- else
- {
- // We'll stick it in decoder
- break;
- }
- }
- }
-
- // See if it was unknown.
- // Unknown and known chars already allocated, but fallbacks aren't
- if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
- {
- if (fallbackBuffer == null)
- {
- if (decoder == null)
- fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - count, null);
- }
-
- // Do fallback
- charCount--; // Get rid of preallocated extra char
- byte[] byteBuffer = null;
- if (iBytes < 0x100)
- byteBuffer = new byte[] { unchecked((byte)iBytes) };
- else
- byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
- charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
- }
- }
-
- // Shouldn't have anything in fallback buffer for GetChars
- Debug.Assert(decoder == null || !decoder.m_throwOnOverflow ||
- !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
- "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at end");
-
- // Return our count
- return charCount;
- }
-
- internal override unsafe int GetChars(byte* bytes, int byteCount,
- char* chars, int charCount, DecoderNLS baseDecoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetChars]bytes is null");
- Debug.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetChars]byteCount is negative");
- Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetChars]chars is null");
- Debug.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetChars]charCount is negative");
-
- CheckMemorySection();
-
- // Fix our decoder
- DBCSDecoder decoder = (DBCSDecoder)baseDecoder;
-
- // We'll need to know where the end is
- byte* byteStart = bytes;
- byte* byteEnd = bytes + byteCount;
- char* charStart = chars;
- char* charEnd = chars + charCount;
- bool bUsedDecoder = false;
-
- // Get our fallback
- DecoderFallbackBuffer fallbackBuffer = null;
-
- // Shouldn't have anything in fallback buffer for GetChars
- Debug.Assert(decoder == null || !decoder.m_throwOnOverflow ||
- !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
- "[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start");
-
- // If we have a left over byte, use it
- if (decoder != null && decoder.bLeftOver > 0)
- {
- // We have a left over byte?
- if (byteCount == 0)
- {
- // No input though
- if (!decoder.MustFlush)
- {
- // Don't have to flush
- return 0;
- }
-
- // Well, we're flushing, so use '?' or fallback
- // fallback leftover byte
- Debug.Assert(fallbackBuffer == null,
- "[DBCSCodePageEncoding.GetChars]Expected empty fallback");
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(bytes, charEnd);
-
- // If no room its hopeless, this was 1st fallback
- byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) };
- if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
- ThrowCharsOverflow(decoder, true);
-
- decoder.bLeftOver = 0;
-
- // Done, return it
- return (int)(chars-charStart);
- }
-
- // Get our full info
- int iBytes = decoder.bLeftOver << 8;
- iBytes |= (*bytes);
- bytes++;
-
- // Look up our bytes
- char cDecoder = mapBytesToUnicode[iBytes];
- if (cDecoder == UNKNOWN_CHAR_FLAG && iBytes != 0)
- {
- Debug.Assert(fallbackBuffer == null,
- "[DBCSCodePageEncoding.GetChars]Expected empty fallback for two bytes");
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
-
- byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
- if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
- ThrowCharsOverflow(decoder, true);
- }
- else
- {
- // Do we have output room?, hopeless if not, this is first char
- if (chars >= charEnd)
- ThrowCharsOverflow(decoder, true);
-
- *(chars++) = cDecoder;
- }
- }
-
- // Loop, paying attention to our fallbacks.
- while (bytes < byteEnd)
- {
- // Faster if don't use *bytes++;
- int iBytes = *bytes;
- bytes++;
- char c = mapBytesToUnicode[iBytes];
-
- // See if it was a double byte character
- if (c == LEAD_BYTE_CHAR)
- {
- // Its a lead byte
- if (bytes < byteEnd)
- {
- // Have another to use, so use it
- iBytes <<= 8;
- iBytes |= *bytes;
- bytes++;
- c = mapBytesToUnicode[iBytes];
- }
- else
- {
- // No input left
- if (decoder == null || decoder.MustFlush)
- {
- // have to flush anyway, set to unknown so we use fallback in a 'sec
- c = UNKNOWN_CHAR_FLAG;
- }
- else
- {
- // Stick it in decoder
- bUsedDecoder = true;
- decoder.bLeftOver = (byte)iBytes;
- break;
- }
- }
- }
-
- // See if it was unknown
- if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
- {
- if (fallbackBuffer == null)
- {
- if (decoder == null)
- fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
- }
-
- // Do fallback
- byte[] byteBuffer = null;
- if (iBytes < 0x100)
- byteBuffer = new byte[] { unchecked((byte)iBytes) };
- else
- byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
- if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
- {
- // May or may not throw, but we didn't get these byte(s)
- Debug.Assert(bytes >= byteStart + byteBuffer.Length,
- "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for fallback");
- bytes-=byteBuffer.Length; // didn't use these byte(s)
- fallbackBuffer.InternalReset(); // Didn't fall this back
- ThrowCharsOverflow(decoder, bytes == byteStart); // throw?
- break; // don't throw, but stop loop
- }
- }
- else
- {
- // Do we have buffer room?
- if (chars >= charEnd)
- {
- // May or may not throw, but we didn't get these byte(s)
- Debug.Assert(bytes > byteStart,
- "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for lead byte");
- bytes--; // unused byte
- if (iBytes >= 0x100)
- {
- Debug.Assert(bytes > byteStart,
- "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for trail byte");
- bytes--; // 2nd unused byte
- }
- ThrowCharsOverflow(decoder, bytes == byteStart); // throw?
- break; // don't throw, but stop loop
- }
-
- *(chars++) = c;
- }
- }
-
- // We already stuck it in encoder if necessary, but we have to clear cases where nothing new got into decoder
- if (decoder != null)
- {
- // Clear it in case of MustFlush
- if (bUsedDecoder == false)
- {
- decoder.bLeftOver = 0;
- }
-
- // Remember our count
- decoder.m_bytesUsed = (int)(bytes - byteStart);
- }
-
- // Shouldn't have anything in fallback buffer for GetChars
- Debug.Assert(decoder == null || !decoder.m_throwOnOverflow ||
- !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
- "[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at end");
-
- // Return length of our output
- return (int)(chars - charStart);
- }
-
- public override int GetMaxByteCount(int charCount)
- {
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(charCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
- long byteCount = (long)charCount + 1;
-
- if (EncoderFallback.MaxCharCount > 1)
- byteCount *= EncoderFallback.MaxCharCount;
-
- // 2 to 1 is worst case. Already considered surrogate fallback
- byteCount *= 2;
-
- if (byteCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
-
- return (int)byteCount;
- }
-
- public override int GetMaxCharCount(int byteCount)
- {
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(byteCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // DBCS is pretty much the same, but could have hanging high byte making extra ? and fallback for unknown
- long charCount = ((long)byteCount + 1);
-
- // 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer.
- if (DecoderFallback.MaxCharCount > 1)
- charCount *= DecoderFallback.MaxCharCount;
-
- if (charCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
-
- return (int)charCount;
- }
-
- public override Decoder GetDecoder()
- {
- return new DBCSDecoder(this);
- }
-
- [Serializable]
- internal class DBCSDecoder : DecoderNLS
- {
- // Need a place for the last left over byte
- internal byte bLeftOver = 0;
-
- public DBCSDecoder(DBCSCodePageEncoding encoding) : base(encoding)
- {
- // Base calls reset
- }
-
- public override void Reset()
- {
- this.bLeftOver = 0;
- if (m_fallbackBuffer != null)
- m_fallbackBuffer.Reset();
- }
-
- // Anything left in our decoder?
- internal override bool HasState
- {
- get
- {
- return (this.bLeftOver != 0);
- }
- }
- }
- }
-}
-#endif // FEATURE_CODEPAGES_FILE
-
diff --git a/src/mscorlib/src/System/Text/Decoder.cs b/src/mscorlib/src/System/Text/Decoder.cs
index 0ebbacddcf..a9fea82a39 100644
--- a/src/mscorlib/src/System/Text/Decoder.cs
+++ b/src/mscorlib/src/System/Text/Decoder.cs
@@ -20,7 +20,6 @@ namespace System.Text
// class are typically obtained through calls to the GetDecoder method
// of Encoding objects.
//
- [System.Runtime.InteropServices.ComVisible(true)]
[Serializable]
public abstract class Decoder
{
@@ -39,7 +38,6 @@ namespace System.Text
// We don't call default reset because default reset probably isn't good if we aren't initialized.
}
- [System.Runtime.InteropServices.ComVisible(false)]
public DecoderFallback Fallback
{
get
@@ -65,7 +63,6 @@ namespace System.Text
// Note: we don't test for threading here because async access to Encoders and Decoders
// doesn't work anyway.
- [System.Runtime.InteropServices.ComVisible(false)]
public DecoderFallbackBuffer FallbackBuffer
{
get
@@ -99,7 +96,6 @@ namespace System.Text
//
// Virtual implimentation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
// We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual void Reset()
{
byte[] byteTemp = Array.Empty<byte>();
@@ -117,7 +113,6 @@ namespace System.Text
//
public abstract int GetCharCount(byte[] bytes, int index, int count);
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
{
return GetCharCount(bytes, index, count);
@@ -126,7 +121,6 @@ namespace System.Text
// We expect this to be the workhorse for NLS Encodings, but for existing
// ones we need a working (if slow) default implimentation)
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
{
// Validate input parameters
@@ -190,7 +184,6 @@ namespace System.Text
// could easily overflow our output buffer. Therefore we do an extra test
// when we copy the buffer so that we don't overflow charCount either.
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetChars(byte* bytes, int byteCount,
char* chars, int charCount, bool flush)
{
@@ -248,7 +241,6 @@ namespace System.Text
// Note that if all of the input bytes are not consumed, then we'll do a /2, which means
// that its likely that we didn't consume as many bytes as we could have. For some
// applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
char[] chars, int charIndex, int charCount, bool flush,
out int bytesUsed, out int charsUsed, out bool completed)
@@ -306,7 +298,6 @@ namespace System.Text
// that its likely that we didn't consume as many bytes as we could have. For some
// applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe void Convert(byte* bytes, int byteCount,
char* chars, int charCount, bool flush,
out int bytesUsed, out int charsUsed, out bool completed)
diff --git a/src/mscorlib/src/System/Text/DecoderFallback.cs b/src/mscorlib/src/System/Text/DecoderFallback.cs
index 42483a724d..bfd4a2852d 100644
--- a/src/mscorlib/src/System/Text/DecoderFallback.cs
+++ b/src/mscorlib/src/System/Text/DecoderFallback.cs
@@ -75,14 +75,6 @@ namespace System.Text
// Maximum number of characters that this instance of this fallback could return
public abstract int MaxCharCount { get; }
-
- internal bool IsMicrosoftBestFitFallback
- {
- get
- {
- return bIsMicrosoftBestFitFallback;
- }
- }
}
diff --git a/src/mscorlib/src/System/Text/DecoderNLS.cs b/src/mscorlib/src/System/Text/DecoderNLS.cs
index e44c43adef..79474f8d8c 100644
--- a/src/mscorlib/src/System/Text/DecoderNLS.cs
+++ b/src/mscorlib/src/System/Text/DecoderNLS.cs
@@ -5,7 +5,6 @@
namespace System.Text
{
using System.Runtime.Serialization;
- using System.Security.Permissions;
using System.Text;
using System;
using System.Diagnostics.Contracts;
@@ -98,7 +97,7 @@ namespace System.Text
bytes = new byte[1];
// Just call pointer version
- fixed (byte* pBytes = bytes)
+ fixed (byte* pBytes = &bytes[0])
return GetCharCount(pBytes + index, count, flush);
}
@@ -159,8 +158,8 @@ namespace System.Text
chars = new char[1];
// Just call pointer version
- fixed (byte* pBytes = bytes)
- fixed (char* pChars = chars)
+ fixed (byte* pBytes = &bytes[0])
+ fixed (char* pChars = &chars[0])
// Remember that charCount is # to decode, not size of array
return GetChars(pBytes + byteIndex, byteCount,
pChars + charIndex, charCount, flush);
@@ -223,9 +222,9 @@ namespace System.Text
chars = new char[1];
// Just call the pointer version (public overrides can't do this)
- fixed (byte* pBytes = bytes)
+ fixed (byte* pBytes = &bytes[0])
{
- fixed (char* pChars = chars)
+ fixed (char* pChars = &chars[0])
{
Convert(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, flush,
out bytesUsed, out charsUsed, out completed);
diff --git a/src/mscorlib/src/System/Text/EUCJPEncoding.cs b/src/mscorlib/src/System/Text/EUCJPEncoding.cs
deleted file mode 100644
index 44345b22b9..0000000000
--- a/src/mscorlib/src/System/Text/EUCJPEncoding.cs
+++ /dev/null
@@ -1,183 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
-namespace System.Text
-{
- using System.Text;
- using System.Globalization;
-
- // EUCJPEncoding
- //
- // EUC-JP Encoding (51932)
- //
- // EUC-JP has the following code points:
- // 00-7F - ASCII
- // 80-8D & 90-9F - Control. (Like Unicode, except for 8e and 8f)
- // A1-FE, A1-FE - 2 byte JIS X 0208 range.
- // 8E, A1-DF - 2 byte half-width Katakana
- // 8F, A1-FE, A1-FE - 3 byte JIX X 0212 range. WE DON'T USE JIS 0212!!!
- //
- // New thoughts:
- // Fixing windows 20932 code page so that all characters can be looked up there.
- //
- // Old thoughts:
- // Windows NLS uses a special CP20932 for EUC-JP, but it is not used by mlang. Windows
- // Maps the 3 byte ranges to the 2 byte CP20932 by masking the 2nd byte with & 0x7F.
- // MLang uses the native windows 932 code page, which is more reliable, however the code points
- // don't line up as nicely as the 20932 code page, however it doesn't have JIS X 0212 support.
- //
- // So what we do is:
- // 1. For ASCII, leave it alone
- // 2. For half-width Katakana, use the leading byte and convert with 20936 code page.
- // 3. For JIS X 0208, Use the leading & trailing bytes with 20936 code page
- // 4. For JIS X 0212, Remove the lead byte, & 0xFF7F, and use the CP20936 table to convert.
- //
- // Regarding Normalization:
- // Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
- // Form D is precluded because of 0x00a8, which changes to space + dierises.
- //
- // I think that IsAlwaysNormalized should probably return true for form C (but not certain)
- //
- // NOTE: We don't use JIS 0212 so we are basically a DBCS code page, we just have to modify
- // the 932 table we're basing this on.
- //
-
- using System;
-
- [Serializable]
- internal class EUCJPEncoding : DBCSCodePageEncoding
- {
- // This pretends to be CP 932 as far as memory tables are concerned.
- public EUCJPEncoding() : base(51932, 932)
- {
- this.m_bUseMlangTypeForSerialization = true;
- }
-
- protected unsafe override String GetMemorySectionName()
- {
- int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
-
- String strName = String.Format(CultureInfo.InvariantCulture, "CodePage_{0}_{1}_{2}_{3}_{4}_EUCJP",
- iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
- this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
-
- return strName;
- }
-
- // Clean up characters for EUC-JP code pages, etc.
- protected override bool CleanUpBytes(ref int bytes)
- {
- if (bytes >= 0x100)
- {
- // map extended char (0xfa40-0xfc4b) to a special range
- // (ported from mlang)
- if (bytes >= 0xfa40 && bytes <= 0xfc4b)
- {
- if ( bytes >= 0xfa40 && bytes <= 0xfa5b )
- {
- if ( bytes <= 0xfa49 )
- bytes = bytes - 0x0b51 ;
- else if ( bytes >= 0xfa4a && bytes <= 0xfa53 )
- bytes = bytes - 0x072f6 ;
- else if ( bytes >= 0xfa54 && bytes <= 0xfa57 )
- bytes = bytes - 0x0b5b ;
- else if ( bytes == 0xfa58 )
- bytes = 0x878a ;
- else if ( bytes == 0xfa59 )
- bytes = 0x8782 ;
- else if ( bytes == 0xfa5a )
- bytes = 0x8784 ;
- else if ( bytes == 0xfa5b )
- bytes = 0x879a ;
- }
- else if ( bytes >= 0xfa5c && bytes <= 0xfc4b )
- {
- byte tc = unchecked((byte)bytes);
- if ( tc < 0x5c )
- bytes = bytes - 0x0d5f;
- else if ( tc >= 0x80 && tc <= 0x9B )
- bytes = bytes - 0x0d1d;
- else
- bytes = bytes - 0x0d1c;
- }
- }
-
- // Convert 932 code page to 20932 like code page range
- // (also ported from mlang)
- byte bLead = unchecked((byte)(bytes >> 8));
- byte bTrail = unchecked((byte)bytes);
-
- bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
- bLead = (byte)((bLead << 1) + 1);
- if (bTrail > (byte)0x9e)
- {
- bTrail -= (byte)0x7e;
- bLead++;
- }
- else
- {
- if (bTrail > (byte)0x7e)
- bTrail--;
- bTrail -= (byte)0x1f;
- }
-
- bytes = ((int)bLead) << 8 | (int)bTrail | 0x8080;
-
- // // Don't step on our katakana special plane, if katakana space return false.
- // if (bytes >= 0x8E00 && bytes <= 0x8EFF)
- // return false;
-
- // Don't step out of our allocated lead byte area.
- // All DBCS lead and trail bytes should be >= 0xa1 and <= 0xfe
- if ((bytes & 0xFF00) < 0xa100 || (bytes & 0xFF00) > 0xfe00 ||
- (bytes & 0xFF) < 0xa1 || (bytes & 0xFF) > 0xfe)
- return false;
-
- // WARNING: Our funky mapping allows illegal values, which we continue to use
- // so that we're compatible with Everett.
- }
- else
- {
- // For 51932 1/2 Katakana gets a 0x8E lead byte
- // Adjust 1/2 Katakana
- if (bytes >= 0xa1 && bytes <= 0xdf)
- {
- bytes |= 0x8E00;
- return true;
- }
-
- // 0x81-0x9f and 0xe0-0xfc CP 932
- // 0x8e and 0xa1-0xfe CP 20932 (we don't use 8e though)
- // b0-df is 1/2 Katakana
- // So 81-9f & e0-fc are 932 lead bytes, a1-fe are our lead bytes
- // so ignore everything above 0x80 except 0xa0 and 0xff
- if (bytes >= 0x81 && bytes != 0xa0 && bytes != 0xff)
- {
- // We set diffent lead bytes later, so just return false
- return false;
- }
- }
-
- return true;
- }
-
- protected override unsafe void CleanUpEndBytes(char* chars)
- {
- // Need to special case CP 51932
- // 0x81-0x9f and 0xe0-0xfc CP 932
- // 0x8e and 0xa1-0xfe CP 20932
- // 0x10 and 0x21-0x9? Us (remapping 932)
- // b0-df is 1/2 Katakana (trail byte)
-
- // A1-FE are DBCS code points
- for (int i = 0xA1; i <= 0xFE; i++)
- chars[i] = LEAD_BYTE_CHAR;
-
- // And 8E is lead byte for Katakana (already set)
- chars[0x8e] = LEAD_BYTE_CHAR;
- }
- }
-}
-#endif // FEATURE_CODEPAGES_FILE
diff --git a/src/mscorlib/src/System/Text/Encoder.cs b/src/mscorlib/src/System/Text/Encoder.cs
index b9d4581276..f766f98142 100644
--- a/src/mscorlib/src/System/Text/Encoder.cs
+++ b/src/mscorlib/src/System/Text/Encoder.cs
@@ -20,7 +20,6 @@ namespace System.Text
// class are typically obtained through calls to the GetEncoder method
// of Encoding objects.
//
- [System.Runtime.InteropServices.ComVisible(true)]
[Serializable]
public abstract class Encoder
{
@@ -39,7 +38,6 @@ namespace System.Text
// We don't call default reset because default reset probably isn't good if we aren't initialized.
}
- [System.Runtime.InteropServices.ComVisible(false)]
public EncoderFallback Fallback
{
get
@@ -65,7 +63,6 @@ namespace System.Text
// Note: we don't test for threading here because async access to Encoders and Decoders
// doesn't work anyway.
- [System.Runtime.InteropServices.ComVisible(false)]
public EncoderFallbackBuffer FallbackBuffer
{
get
@@ -99,7 +96,6 @@ namespace System.Text
//
// Virtual implimentation has to call GetBytes with flush and a big enough buffer to clear a 0 char string
// We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big.
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual void Reset()
{
char[] charTemp = {};
@@ -122,7 +118,6 @@ namespace System.Text
// unfortunately for existing overrides, it has to call the [] version,
// which is really slow, so avoid this method if you might be calling external encodings.
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetByteCount(char* chars, int count, bool flush)
{
// Validate input parameters
@@ -183,7 +178,6 @@ namespace System.Text
// could easily overflow our output buffer. Therefore we do an extra test
// when we copy the buffer so that we don't overflow byteCount either.
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetBytes(char* chars, int charCount,
byte* bytes, int byteCount, bool flush)
{
@@ -240,7 +234,6 @@ namespace System.Text
// Note that if all of the input chars are not consumed, then we'll do a /2, which means
// that its likely that we didn't consume as many chars as we could have. For some
// applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual void Convert(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex, int byteCount, bool flush,
out int charsUsed, out int bytesUsed, out bool completed)
@@ -299,7 +292,6 @@ namespace System.Text
// that its likely that we didn't consume as many chars as we could have. For some
// applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe void Convert(char* chars, int charCount,
byte* bytes, int byteCount, bool flush,
out int charsUsed, out int bytesUsed, out bool completed)
diff --git a/src/mscorlib/src/System/Text/EncoderBestFitFallback.cs b/src/mscorlib/src/System/Text/EncoderBestFitFallback.cs
index c5f82a299b..9be095bbd8 100644
--- a/src/mscorlib/src/System/Text/EncoderBestFitFallback.cs
+++ b/src/mscorlib/src/System/Text/EncoderBestFitFallback.cs
@@ -123,7 +123,7 @@ namespace System.Text
0xD800, 0xDBFF));
if (!Char.IsLowSurrogate(charUnknownLow))
- throw new ArgumentOutOfRangeException("CharUnknownLow",
+ throw new ArgumentOutOfRangeException(nameof(charUnknownLow),
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xDC00, 0xDFFF));
Contract.EndContractBlock();
diff --git a/src/mscorlib/src/System/Text/EncoderExceptionFallback.cs b/src/mscorlib/src/System/Text/EncoderExceptionFallback.cs
index 051f50ac7c..6735e7a5f8 100644
--- a/src/mscorlib/src/System/Text/EncoderExceptionFallback.cs
+++ b/src/mscorlib/src/System/Text/EncoderExceptionFallback.cs
@@ -68,7 +68,7 @@ namespace System.Text
}
if (!Char.IsLowSurrogate(charUnknownLow))
{
- throw new ArgumentOutOfRangeException("CharUnknownLow",
+ throw new ArgumentOutOfRangeException(nameof(charUnknownLow),
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xDC00, 0xDFFF));
}
diff --git a/src/mscorlib/src/System/Text/EncoderNLS.cs b/src/mscorlib/src/System/Text/EncoderNLS.cs
index 2add017d68..95901e01f4 100644
--- a/src/mscorlib/src/System/Text/EncoderNLS.cs
+++ b/src/mscorlib/src/System/Text/EncoderNLS.cs
@@ -5,7 +5,6 @@
namespace System.Text
{
using System.Runtime.Serialization;
- using System.Security.Permissions;
using System.Text;
using System;
using System.Diagnostics.Contracts;
@@ -98,7 +97,7 @@ namespace System.Text
// Just call the pointer version
int result = -1;
- fixed (char* pChars = chars)
+ fixed (char* pChars = &chars[0])
{
result = GetByteCount(pChars + index, count, flush);
}
@@ -151,8 +150,8 @@ namespace System.Text
bytes = new byte[1];
// Just call pointer version
- fixed (char* pChars = chars)
- fixed (byte* pBytes = bytes)
+ fixed (char* pChars = &chars[0])
+ fixed (byte* pBytes = &bytes[0])
// Remember that charCount is # to decode, not size of array.
return GetBytes(pChars + charIndex, charCount,
@@ -212,9 +211,9 @@ namespace System.Text
bytes = new byte[1];
// Just call the pointer version (can't do this for non-msft encoders)
- fixed (char* pChars = chars)
+ fixed (char* pChars = &chars[0])
{
- fixed (byte* pBytes = bytes)
+ fixed (byte* pBytes = &bytes[0])
{
Convert(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush,
out charsUsed, out bytesUsed, out completed);
diff --git a/src/mscorlib/src/System/Text/EncoderReplacementFallback.cs b/src/mscorlib/src/System/Text/EncoderReplacementFallback.cs
index 604cddf9bb..b0657ff18d 100644
--- a/src/mscorlib/src/System/Text/EncoderReplacementFallback.cs
+++ b/src/mscorlib/src/System/Text/EncoderReplacementFallback.cs
@@ -153,7 +153,7 @@ namespace System.Text
0xD800, 0xDBFF));
if (!Char.IsLowSurrogate(charUnknownLow))
- throw new ArgumentOutOfRangeException("CharUnknownLow",
+ throw new ArgumentOutOfRangeException(nameof(charUnknownLow),
Environment.GetResourceString("ArgumentOutOfRange_Range",
0xDC00, 0xDFFF));
Contract.EndContractBlock();
diff --git a/src/mscorlib/src/System/Text/Encoding.cs b/src/mscorlib/src/System/Text/Encoding.cs
index 658bdbb133..8cb01e41fa 100644
--- a/src/mscorlib/src/System/Text/Encoding.cs
+++ b/src/mscorlib/src/System/Text/Encoding.cs
@@ -12,7 +12,6 @@ namespace System.Text
using System.Runtime.Serialization;
using System.Globalization;
using System.Security;
- using System.Security.Permissions;
using System.Threading;
using System.Text;
using System.Diagnostics;
@@ -83,7 +82,6 @@ namespace System.Text
// generally executes faster.
//
- [System.Runtime.InteropServices.ComVisible(true)]
[Serializable]
public abstract class Encoding : ICloneable
{
@@ -372,23 +370,6 @@ namespace System.Text
return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
}
-#if FEATURE_CODEPAGES_FILE
- // Private object for locking instead of locking on a public type for SQL reliability work.
- private static Object s_InternalSyncObject;
- private static Object InternalSyncObject {
- get {
- if (s_InternalSyncObject == null) {
- Object o = new Object();
- Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null);
- }
- return s_InternalSyncObject;
- }
- }
-
- // On Desktop, encoding instances that aren't cached in a static field are cached in
- // a hash table by codepage.
- private static volatile Hashtable encodings;
-#endif
public static void RegisterProvider(EncodingProvider provider)
{
@@ -441,45 +422,6 @@ namespace System.Text
"Argument_CodepageNotSupported", codepage), nameof(codepage));
}
-#if FEATURE_CODEPAGES_FILE
- object key = codepage; // Box once
-
- // See if we have a hash table with our encoding in it already.
- if (encodings != null) {
- result = (Encoding)encodings[key];
- }
-
- if (result == null)
- {
- // Don't conflict with ourselves
- lock (InternalSyncObject)
- {
- // Need a new hash table
- // in case another thread beat us to creating the Dictionary
- if (encodings == null) {
- encodings = new Hashtable();
- }
-
- // Double check that we don't have one in the table (in case another thread beat us here)
- if ((result = (Encoding)encodings[key]) != null)
- return result;
-
- if (codepage == CodePageWindows1252)
- {
- result = new SBCSCodePageEncoding(codepage);
- }
- else
- {
- result = GetEncodingCodePage(codepage) ?? GetEncodingRare(codepage);
- }
-
- Debug.Assert(result != null, "result != null");
-
- encodings.Add(key, result);
- }
- }
- return result;
-#else
// Is it a valid code page?
if (EncodingTable.GetCodePageDataItem(codepage) == null)
{
@@ -488,7 +430,6 @@ namespace System.Text
}
return UTF8;
-#endif // FEATURE_CODEPAGES_FILE
}
[Pure]
@@ -510,86 +451,6 @@ namespace System.Text
return fallbackEncoding;
}
-#if FEATURE_CODEPAGES_FILE
- private static Encoding GetEncodingRare(int codepage)
- {
- Debug.Assert(codepage != 0 && codepage != 1200 && codepage != 1201 && codepage != 65001,
- "[Encoding.GetEncodingRare]This code page (" + codepage + ") isn't supported by GetEncodingRare!");
- Encoding result;
- switch (codepage)
- {
- case ISCIIAssemese:
- case ISCIIBengali:
- case ISCIIDevanagari:
- case ISCIIGujarathi:
- case ISCIIKannada:
- case ISCIIMalayalam:
- case ISCIIOriya:
- case ISCIIPanjabi:
- case ISCIITamil:
- case ISCIITelugu:
- result = new ISCIIEncoding(codepage);
- break;
- // GB2312-80 uses same code page for 20936 and mac 10008
- case CodePageMacGB2312:
- // case CodePageGB2312:
- // result = new DBCSCodePageEncoding(codepage, EUCCN);
- result = new DBCSCodePageEncoding(CodePageMacGB2312, CodePageGB2312);
- break;
-
- // Mac Korean 10003 and 20949 are the same
- case CodePageMacKorean:
- result = new DBCSCodePageEncoding(CodePageMacKorean, CodePageDLLKorean);
- break;
- // GB18030 Code Pages
- case GB18030:
- result = new GB18030Encoding();
- break;
- // ISO2022 Code Pages
- case ISOKorean:
- // case ISOSimplifiedCN
- case ChineseHZ:
- case ISO2022JP: // JIS JP, full-width Katakana mode (no half-width Katakana)
- case ISO2022JPESC: // JIS JP, esc sequence to do Katakana.
- case ISO2022JPSISO: // JIS JP with Shift In/ Shift Out Katakana support
- result = new ISO2022Encoding(codepage);
- break;
- // Duplicate EUC-CN (51936) just calls a base code page 936,
- // so does ISOSimplifiedCN (50227), which's gotta be broken
- case DuplicateEUCCN:
- case ISOSimplifiedCN:
- result = new DBCSCodePageEncoding(codepage, EUCCN); // Just maps to 936
- break;
- case EUCJP:
- result = new EUCJPEncoding();
- break;
- case EUCKR:
- result = new DBCSCodePageEncoding(codepage, CodePageDLLKorean); // Maps to 20949
- break;
- case ENC50229:
- throw new NotSupportedException(Environment.GetResourceString("NotSupported_CodePage50229"));
- case ISO_8859_8I:
- result = new SBCSCodePageEncoding(codepage, ISO_8859_8_Visual); // Hebrew maps to a different code page
- break;
- default:
- // Not found, already tried codepage table code pages in GetEncoding()
- throw new NotSupportedException(
- Environment.GetResourceString("NotSupported_NoCodepageData", codepage));
- }
- return result;
- }
-
- private static Encoding GetEncodingCodePage(int CodePage)
- {
- // Single Byte or Double Byte Code Page? (0 if not found)
- int i = BaseCodePageEncoding.GetCodePageByteSize(CodePage);
- if (i == 1) return new SBCSCodePageEncoding(CodePage);
- else if (i == 2) return new DBCSCodePageEncoding(CodePage);
-
- // Return null if we didn't find one.
- return null;
- }
-#endif // FEATURE_CODEPAGES_FILE
// Returns an Encoding object for a given name or a given code page value.
//
[Pure]
@@ -764,7 +625,6 @@ namespace System.Text
// True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual bool IsSingleByte
{
get
@@ -774,7 +634,6 @@ namespace System.Text
}
- [System.Runtime.InteropServices.ComVisible(false)]
public EncoderFallback EncoderFallback
{
get
@@ -796,7 +655,6 @@ namespace System.Text
}
- [System.Runtime.InteropServices.ComVisible(false)]
public DecoderFallback DecoderFallback
{
get
@@ -818,7 +676,6 @@ namespace System.Text
}
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual Object Clone()
{
Encoding newEncoding = (Encoding)this.MemberwiseClone();
@@ -829,7 +686,6 @@ namespace System.Text
}
- [System.Runtime.InteropServices.ComVisible(false)]
public bool IsReadOnly
{
get
@@ -918,7 +774,6 @@ namespace System.Text
// a 3rd party encoding.
[Pure]
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetByteCount(char* chars, int count)
{
// Validate input parameters
@@ -1080,7 +935,6 @@ namespace System.Text
// when we copy the buffer so that we don't overflow byteCount either.
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetBytes(char* chars, int charCount,
byte* bytes, int byteCount)
{
@@ -1149,7 +1003,6 @@ namespace System.Text
// ones we need a working (if slow) default implimentation)
[Pure]
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetCharCount(byte* bytes, int count)
{
// Validate input parameters
@@ -1236,7 +1089,6 @@ namespace System.Text
// when we copy the buffer so that we don't overflow charCount either.
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual unsafe int GetChars(byte* bytes, int byteCount,
char* chars, int charCount)
{
@@ -1291,7 +1143,6 @@ namespace System.Text
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public unsafe string GetString(byte* bytes, int byteCount)
{
if (bytes == null)
@@ -1320,18 +1171,12 @@ namespace System.Text
// IsAlwaysNormalized
// Returns true if the encoding is always normalized for the specified encoding form
[Pure]
- [System.Runtime.InteropServices.ComVisible(false)]
public bool IsAlwaysNormalized()
{
-#if !FEATURE_NORM_IDNA_ONLY
return this.IsAlwaysNormalized(NormalizationForm.FormC);
-#else
- return this.IsAlwaysNormalized((NormalizationForm)ExtendedNormalizationForms.FormIdna);
-#endif
}
[Pure]
- [System.Runtime.InteropServices.ComVisible(false)]
public virtual bool IsAlwaysNormalized(NormalizationForm form)
{
// Assume false unless the encoding knows otherwise
@@ -1364,23 +1209,10 @@ namespace System.Text
Encoding enc;
-#if FEATURE_CODEPAGES_FILE
- int codePage = Win32Native.GetACP();
-
- // For US English, we can save some startup working set by not calling
- // GetEncoding(int codePage) since JITting GetEncoding will force us to load
- // all the Encoding classes for ASCII, UTF7 & UTF8, & UnicodeEncoding.
-
- if (codePage == 1252)
- enc = new SBCSCodePageEncoding(codePage);
- else
- enc = GetEncoding(codePage);
-#else // FEATURE_CODEPAGES_FILE
// For silverlight we use UTF8 since ANSI isn't available
enc = UTF8;
-#endif // FEATURE_CODEPAGES_FILE
// This method should only ever return one Encoding instance
return Interlocked.CompareExchange(ref defaultEncoding, enc, null) ?? enc;
@@ -1882,20 +1714,6 @@ namespace System.Text
return AddChar(ch,1);
}
-
- internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
- {
- // Need room for 2 chars
- if (chars >= charEnd - 1)
- {
- // Throw maybe
- bytes-=numBytes; // Didn't encode these bytes
- enc.ThrowCharsOverflow(decoder, bytes <= byteStart); // Throw?
- return false; // No throw, but no store either
- }
- return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
- }
-
internal unsafe void AdjustBytes(int count)
{
bytes += count;
@@ -1909,12 +1727,6 @@ namespace System.Text
}
}
- // Do we have count more bytes?
- internal unsafe bool EvenMoreData(int count)
- {
- return (bytes <= byteEnd - count);
- }
-
// GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
// but we'll double check just to make sure.
internal unsafe byte GetNextByte()
@@ -1942,24 +1754,6 @@ namespace System.Text
return Fallback(byteBuffer);
}
- internal unsafe bool Fallback(byte byte1, byte byte2)
- {
- // Build our buffer
- byte[] byteBuffer = new byte[] { byte1, byte2 };
-
- // Do the fallback and add the data.
- return Fallback(byteBuffer);
- }
-
- internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
- {
- // Build our buffer
- byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
-
- // Do the fallback and add the data.
- return Fallback(byteBuffer);
- }
-
internal unsafe bool Fallback(byte[] byteBuffer)
{
// Do the fallback and add the data.
@@ -2067,26 +1861,6 @@ namespace System.Text
return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
}
- internal unsafe bool AddByte(byte b1, byte b2, byte b3)
- {
- return AddByte(b1, b2, b3, (int)0);
- }
-
- internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
- {
- return (AddByte(b1, 2 + moreBytesExpected) &&
- AddByte(b2, 1 + moreBytesExpected) &&
- AddByte(b3, moreBytesExpected));
- }
-
- internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
- {
- return (AddByte(b1, 3) &&
- AddByte(b2, 2) &&
- AddByte(b3, 1) &&
- AddByte(b4, 0));
- }
-
internal unsafe void MovePrevious(bool bThrow)
{
if (fallbackBuffer.bFallingBack)
@@ -2104,12 +1878,6 @@ namespace System.Text
enc.ThrowBytesOverflow(encoder, bytes == byteStart); // Throw? (and reset fallback if not converting)
}
- internal unsafe bool Fallback(char charFallback)
- {
- // Do the fallback
- return fallbackBuffer.InternalFallback(charFallback, ref chars);
- }
-
internal unsafe bool MoreData
{
get
diff --git a/src/mscorlib/src/System/Text/EncodingForwarder.cs b/src/mscorlib/src/System/Text/EncodingForwarder.cs
index 9a8dd26627..50ccbd9333 100644
--- a/src/mscorlib/src/System/Text/EncodingForwarder.cs
+++ b/src/mscorlib/src/System/Text/EncodingForwarder.cs
@@ -130,7 +130,7 @@ namespace System.Text
if (bytes.Length == 0)
bytes = new byte[1];
- fixed (char* pChars = s) fixed (byte* pBytes = bytes)
+ fixed (char* pChars = s) fixed (byte* pBytes = &bytes[0])
{
return encoding.GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, encoder: null);
}
@@ -170,7 +170,7 @@ namespace System.Text
bytes = new byte[1];
// Just call the (internal) pointer version
- fixed (char* pChars = chars) fixed (byte* pBytes = bytes)
+ fixed (char* pChars = chars) fixed (byte* pBytes = &bytes[0])
{
return encoding.GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, encoder: null);
}
@@ -266,7 +266,7 @@ namespace System.Text
if (chars.Length == 0)
chars = new char[1];
- fixed (byte* pBytes = bytes) fixed (char* pChars = chars)
+ fixed (byte* pBytes = bytes) fixed (char* pChars = &chars[0])
{
return encoding.GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, decoder: null);
}
diff --git a/src/mscorlib/src/System/Text/EncodingNLS.cs b/src/mscorlib/src/System/Text/EncodingNLS.cs
index fbddf37e88..cb6ed8a52c 100644
--- a/src/mscorlib/src/System/Text/EncodingNLS.cs
+++ b/src/mscorlib/src/System/Text/EncodingNLS.cs
@@ -15,7 +15,6 @@ namespace System.Text
// This class overrides Encoding with the things we need for our NLS Encodings
- [System.Runtime.InteropServices.ComVisible(true)]
[Serializable]
internal abstract class EncodingNLS : Encoding
{
diff --git a/src/mscorlib/src/System/Text/EncodingProvider.cs b/src/mscorlib/src/System/Text/EncodingProvider.cs
index a7f745a753..734d1ac761 100644
--- a/src/mscorlib/src/System/Text/EncodingProvider.cs
+++ b/src/mscorlib/src/System/Text/EncodingProvider.cs
@@ -8,7 +8,6 @@ namespace System.Text
using System.Collections;
using System.Collections.Generic;
- [System.Runtime.InteropServices.ComVisible(true)]
public abstract class EncodingProvider
{
public EncodingProvider() { }
diff --git a/src/mscorlib/src/System/Text/GB18030Encoding.cs b/src/mscorlib/src/System/Text/GB18030Encoding.cs
deleted file mode 100644
index 8ed52a6ab8..0000000000
--- a/src/mscorlib/src/System/Text/GB18030Encoding.cs
+++ /dev/null
@@ -1,1365 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-
-//
-// Ported to managed code from c_gb18030.c and related gb18030 dll files
-//
-//
-// Abstract:
-//
-// Managed implementation of GB18030-2000 (code page 54936) ported from implimentation in c_g18030.dll
-// This file contains functions to convert GB18030-2000 (code page 54936) into Unicode, and vice versa.
-//
-// Notes:
-// GB18030-2000 (aka GBK2K) is designed to be mostly compatible with GBK (codepage 936),
-// while supports the full range of Unicode code points (BMP + 16 supplementary planes).
-//
-// The structure for GB18030 is:
-// * Single byte:
-// 0x00 ~ 0x7f
-// * Two-byte:
-// 0x81 ~ 0xfe, 0x40 ~ 0x7e (leading byte, trailing byte)
-// 0x81 ~ 0xfe, 0x80 ~ 0xfe (leading byte, trailing byte)
-// * Four-byte:
-// 0x81 ~ 0xfe, 0x30 ~ 0x39, 0x81 ~ 0xfe, 0x30 ~ 0x39.
-// The surrogare pair will be encoded from 0x90, 0x30, 0x81, 0x30
-//
-// The BMP range is fully supported in GB18030 using 1-byte, 2-byte and 4-byte sequences.
-// In valid 4-byte GB18030, there are two gaps that can not be mapped to Unicode characters.
-// 0x84, 0x31, 0xa5, 0x30 (just after the GB18030 bytes for U+FFFF(*)) ~ 0x8f, 0x39, 0xfe, 0x39 (just before the first GB18030 bytes for U+D800,U+DC00)
-// 0xe3, 0x32, 0x9a, 0x36 (just after the GB18030 bytes for U+DBFF U+DFFF(**)) ~ 0xfe, 0x39, 0xfe, 0x39
-//
-//
-// Note1: U+FFFF = 0x84, 0x31, 0xa4, 0x39
-// Note2: U+DBFF U+DFFF = 0xe3, 0x32, 0x9a, 0x35
-//
-// Tables used in GB18030Encoding:
-//
-// Our data is similar to the 936 Code Page, so we start from there to build our tables. We build the
-// normal double byte mapUnicodeToBytes and mapBytesToUnicode tables by applying differences from 936.
-// We also build a map4BytesToUnicode table and a mapUnicodeTo4BytesFlags
-//
-// * mapUnicodeTo4BytesFlags
-// This is an array of bytes, so we have to do a / 8 and << %8 to check the appropriate bit (see Is4Byte())
-// If the bit is set its true.
-//
-// true - If set/true this is a 4 byte code. The value in mapUnicodeToBytes will be the 4 byte offset
-// false - If cleared/false this is a 1 or 2 byte code. The value in mapUnicodeToBytes will be the 2 bytes.
-//
-// * mapUnicodeToBytes
-// Contains either the 2 byte value of double byte GB18030 or the 4 byte offset for 4 byte GB18030,
-// depending on the value of the flag in mapUnicodeTo4BytesFlags
-//
-// * mapBytesToUnicode
-// mapBytesToUnicode maps 2 byte GB 18030 to Unicode like other DBCS code pages.
-//
-// * map4BytesToUnicode
-// map4BytesToUnicode is indexed by the 4 byte offset and contains the unicode value for each 4 byte offset
-//
-//
-// 4 Byte sequences
-// We generally use the offset for the 4 byte sequence, such as:
-//
-// The index value is the offset of the 4-byte GB18030.
-//
-// 4-byte GB18030 Index value
-// ============== ===========
-// 81,30,81,30 0
-// 81,30,81,31 1
-// 81,30,81,32 2
-// ... ...
-//
-// The value of map4BytesToUnicode cotains the Unicode codepoint for the offset of the
-// corresponding 4-byte GB18030.
-//
-// E.g. map4BytesToUnicode[0] = 0x0080. This means that GB18030 0x81, 0x30, 0x81, 0x30 will be converted to Unicode U+0800.
-//
-// 4 Byte Surrogate Sequences
-// Those work similarly to the normal 4 byte sequences, but start at a different offset
-//
-// We don't override IsAlwaysNormalized because GB18030 covers all of the unicode space, so isn't guaranteed to be normal.
-//
-#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
-namespace System.Text
-{
- using System;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
- using System.Text;
- using System.Runtime.InteropServices;
- using System.Security;
- using System.Runtime.CompilerServices;
- using System.Runtime.Serialization;
- using System.Runtime.Versioning;
- using System.Security.Permissions;
- using System.Globalization;
-
- /*=================================GB18030Encoding============================
- **
- ** This is used to support GB18030-2000 encoding (code page 54936).
- **
- ==============================================================================*/
-
- [Serializable]
- internal sealed class GB18030Encoding : DBCSCodePageEncoding, ISerializable
- {
- // This is the table of 4 byte conversions.
- private const int GBLast4ByteCode = 0x99FB;
- [NonSerialized]
- unsafe internal char* map4BytesToUnicode = null; // new char[GBLast4ByteCode + 1]; // Need to map all 4 byte sequences to Unicode
- [NonSerialized]
- unsafe internal byte* mapUnicodeTo4BytesFlags = null; // new byte[0x10000 / 8]; // Need 1 bit for each code point to say if its 4 byte or not
-
- private const int GB18030 = 54936;
-
- // First and last character of surrogate range as offset from 4 byte GB18030 GB81308130
- private const int GBSurrogateOffset = 0x2E248; // GB90308130
- private const int GBLastSurrogateOffset = 0x12E247; // GBE3329A35
-
- // We have to load the 936 code page tables, so impersonate 936 as our base
- internal GB18030Encoding() : base(GB18030, 936)
- {
- }
-
- // Constructor called by serialization.
- internal GB18030Encoding(SerializationInfo info, StreamingContext context) :
- base(GB18030, 936)
- {
- // Set up our base, also throws if info was empty
- DeserializeEncoding(info, context);
- Debug.Assert(info!=null, "[GB18030Encoding(Serialization...)] Expected null info to throw");
-
- // Already build our code page, fallbacks & read only, so we're good to go!
- }
-
- // ISerializable implementation
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // Make sure to get the base stuff too This throws if info is null
- SerializeEncoding(info, context);
- Debug.Assert(info!=null, "[GB18030.GetObjectData] Expected null info to throw");
-
- // Everett doesn't need more than the basics
- }
-
- // This loads our base 936 code page and then applys the changes from the tableUnicodeToGBDiffs table.
- // See table comments for table format.
- protected override unsafe void LoadManagedCodePage()
- {
- // Use base code page loading algorithm.
- // We need to use our main CP as our flag.
- this.bFlagDataTable = false;
- this.iExtraBytes = (GBLast4ByteCode + 1) * 2 + 0x10000 / 8;
-
- // Load most of our code page
- base.LoadManagedCodePage();
-
- // Point to our new data sections
- byte *pMemorySection = (byte *) safeMemorySectionHandle.DangerousGetHandle();
- mapUnicodeTo4BytesFlags = pMemorySection + 65536 * 2 * 2;
- map4BytesToUnicode = (char*)(pMemorySection + 65536 * 2 * 2 + 0x10000 / 8);
-
- // Need to check our pointer to see if we're loaded, return if we're built already
- if (*mapCodePageCached == this.CodePage)
- return;
-
- // Once we've done our base LoadManagedCodePage, we'll have to add our fixes
- char unicodeCount = (char)0;
- ushort count4Byte = 0;
- for (int index = 0; index < tableUnicodeToGBDiffs.Length; index++)
- {
- ushort data = tableUnicodeToGBDiffs[index];
-
- // Check high bit
- if ((data & 0x8000) != 0)
- {
- // Make be exact value
- if (data > 0x9000 && data != 0xD1A6)
- {
- // It was an exact value (gb18040[data] = unicode)
- mapBytesToUnicode[data] = unicodeCount;
- mapUnicodeToBytes[unicodeCount] = data;
- unicodeCount++;
- }
- else
- {
- // It was a CP 936 compatible data, that table's already loaded, just increment our pointer
- unicodeCount += unchecked((char)(data & 0x7FFF));
- }
- }
- else
- {
- // It was GB 18030 4 byte data, next <data> characters are 4 byte sequences.
- while (data > 0)
- {
- Debug.Assert(count4Byte <= GBLast4ByteCode,
- "[GB18030Encoding.LoadManagedCodePage] Found too many 4 byte codes in data table.");
-
- // Set the 4 byte -> Unicode value
- map4BytesToUnicode[count4Byte] = unicodeCount;
- // Set the unicode -> 4 bytes value, including flag that its a 4 byte sequence
- mapUnicodeToBytes[unicodeCount] = count4Byte;
- // Set the flag saying its a 4 byte sequence
- mapUnicodeTo4BytesFlags[unicodeCount / 8] |= unchecked((byte)(1 << (unicodeCount % 8)));
- unicodeCount++;
- count4Byte++;
- data--;
- }
-
- }
- }
-
- // unicodeCount should've wrapped back to 0
- Debug.Assert(unicodeCount == 0,
- "[GB18030Encoding.LoadManagedCodePage] Expected unicodeCount to wrap around to 0 as all chars were processed");
-
- // We should've read in GBLast4ByteCode 4 byte sequences
- Debug.Assert(count4Byte == GBLast4ByteCode + 1,
- "[GB18030Encoding.LoadManagedCodePage] Expected 0x99FB to be last 4 byte offset, found 0x" + count4Byte.ToString("X4", CultureInfo.InvariantCulture));
-
- // Need to flag ourselves saying we've built this CP.
- *mapCodePageCached = this.CodePage;
- }
-
- internal override void SetDefaultFallbacks()
- {
- // For GB18030Encoding just use default replacement fallbacks because its only for bad surrogates
- this.encoderFallback = EncoderFallback.ReplacementFallback;
- this.decoderFallback = DecoderFallback.ReplacementFallback;
- }
-
- // Is4Byte
- // Checks the 4 byte table and returns true if this is a 4 byte code.
- // Its a 4 byte code if the flag is set in mapUnicodeTo4BytesFlags
- internal unsafe bool Is4Byte(char charTest)
- {
- // See what kind it is
- byte b4Byte = mapUnicodeTo4BytesFlags[charTest / 8];
- return (b4Byte != 0 && (b4Byte & (1 << (charTest % 8))) != 0);
- }
-
- // GetByteCount
- internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
- {
- // Just call GetBytes() with null bytes
- return GetBytes(chars, count, null, 0, encoder);
- }
-
- internal override unsafe int GetBytes(char* chars, int charCount,
- byte* bytes, int byteCount, EncoderNLS encoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- // We'll allow null bytes as a count
-// Debug.Assert(bytes != null, "[GB18030Encoding.GetBytes]bytes is null");
- Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetBytes]byteCount is negative");
- Debug.Assert(chars != null, "[GB18030Encoding.GetBytes]chars is null");
- Debug.Assert(charCount >= 0, "[GB18030Encoding.GetBytes]charCount is negative");
-
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[GB18030Encoding.GetBytes]Attempting to use null encoder fallback");
-
- // Get any left over characters
- char charLeftOver = (char)0;
- if (encoder != null)
- charLeftOver = encoder.charLeftOver;
-
- // prepare our helpers
- Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
- this, encoder, bytes, byteCount, chars, charCount);
-
- // Try again if we were MustFlush
- TryAgain:
-
- // Go ahead and do it, including the fallback.
- while (buffer.MoreData)
- {
- // Get next char
- char ch = buffer.GetNextChar();
-
- // Have to check for charLeftOver
- if (charLeftOver != 0)
- {
- Debug.Assert(Char.IsHighSurrogate(charLeftOver),
- "[GB18030Encoding.GetBytes] leftover character should be high surrogate, not 0x" + ((int)charLeftOver).ToString("X4", CultureInfo.InvariantCulture));
-
- // If our next char isn't a low surrogate, then we need to do fallback.
- if (!Char.IsLowSurrogate(ch))
- {
- // No low surrogate, fallback high surrogate & try this one again
- buffer.MovePrevious(false); // (Ignoring this character, don't thow)
- if (!buffer.Fallback(charLeftOver))
- {
- charLeftOver = (char)0;
- break;
- }
- charLeftOver = (char)0;
- continue;
- }
- else
- {
- // Next is a surrogate, add it as surrogate pair
-
- // Need 4 bytes for surrogates
- // Get our offset
- int offset = ((charLeftOver - 0xd800) << 10) + (ch - 0xdc00);
-
- byte byte4 = (byte)((offset % 0x0a) + 0x30);
- offset /= 0x0a;
- byte byte3 = (byte)((offset % 0x7e) + 0x81);
- offset /= 0x7e;
- byte byte2 = (byte)((offset % 0x0a) + 0x30);
- offset /= 0x0a;
- Debug.Assert(offset < 0x6f,
- "[GB18030Encoding.GetBytes](1) Expected offset < 0x6f, not 0x" + offset.ToString("X2", CultureInfo.InvariantCulture));
-
- charLeftOver = (char)0;
- if (!buffer.AddByte((byte)(offset + 0x90),byte2,byte3,byte4))
- {
- // Didn't work, need to back up for both surrogates (AddByte already backed up one)
- buffer.MovePrevious(false); // (don't throw)
- break;
- }
- }
- charLeftOver = '\0';
- }
- // ASCII's easiest
- else if (ch <= 0x7f)
- {
- // Need a byte
- if (!buffer.AddByte((byte)ch))
- break;
- }
- // See if its a surrogate pair
- else if (Char.IsHighSurrogate(ch))
- {
- // Remember it for next time
- charLeftOver = ch;
- }
- else if (Char.IsLowSurrogate(ch))
- {
- // Low surrogates should've been found already
- if (!buffer.Fallback(ch))
- break;
- }
- else
- {
- // Not surrogate or ASCII, get value
- ushort iBytes = mapUnicodeToBytes[ch];
-
- // See what kind it is
- if (Is4Byte(ch))
- {
- //
- // This Unicode character will be converted to four-byte GB18030.
- //
- // Need 4 bytes
- byte byte4 = (byte)((iBytes % 0x0a) + 0x30);
- iBytes /= 0x0a;
- byte byte3 = (byte)((iBytes % 0x7e) + 0x81);
- iBytes /= 0x7e;
- byte byte2 = (byte)((iBytes % 0x0a) + 0x30);
- iBytes /= 0x0a;
- Debug.Assert(iBytes < 0x7e,
- "[GB18030Encoding.GetBytes]Expected iBytes < 0x7e, not 0x" + iBytes.ToString("X2", CultureInfo.InvariantCulture));
- if (!buffer.AddByte((byte)(iBytes + 0x81), byte2, byte3, byte4))
- break;
- }
- else
- {
- // Its 2 byte, use it
- if (!buffer.AddByte(unchecked((byte)(iBytes >> 8)), unchecked((byte)(iBytes & 0xff))))
- break;
- }
- }
- }
-
- // Do we need to flush our charLeftOver?
- if ((encoder == null || encoder.MustFlush) && (charLeftOver > 0))
- {
- // Fall it back
- buffer.Fallback(charLeftOver);
- charLeftOver = (char)0;
- goto TryAgain;
- }
-
- // Fallback stuck it in encoder if necessary, but we have to clear MustFlash cases
- // (Check bytes != null, don't clear it if we're just counting)
- if (encoder != null)
- {
- // Remember our charLeftOver
- if (bytes != null)
- encoder.charLeftOver = charLeftOver;
-
- encoder.m_charsUsed = buffer.CharsUsed;
- }
-
- // Return our length
- return buffer.Count;
- }
-
- // Helper methods
- internal bool IsGBLeadByte(short ch)
- {
- // return true if we're in the lead byte range
- return ((ch) >= 0x81 && (ch) <= 0xfe);
- }
-
- internal bool IsGBTwoByteTrailing(short ch)
- {
- // Return true if we are in range for the trailing byte of a 2 byte sequence
- return (((ch) >= 0x40 && (ch) <= 0x7e) ||
- ((ch) >= 0x80 && (ch) <= 0xfe));
- }
-
- internal bool IsGBFourByteTrailing(short ch)
- {
- // Return true if we are in range for the trailing byte of a 4 byte sequence
- return ((ch) >= 0x30 && (ch) <= 0x39);
- }
-
- internal int GetFourBytesOffset(short offset1, short offset2, short offset3, short offset4)
- {
- return ((offset1 - 0x81) * 0x0a * 0x7e * 0x0a +
- (offset2 - 0x30) * 0x7e * 0x0a +
- (offset3 - 0x81) * 0x0a +
- offset4 - 0x30);
- }
-
- // This is internal and called by something else,
- internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
- {
- // Just call GetChars() with null chars to count
- return GetChars(bytes, count, null, 0, baseDecoder);
- }
-
- internal override unsafe int GetChars(byte* bytes, int byteCount,
- char* chars, int charCount, DecoderNLS baseDecoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- // We'll allow null chars as a count
- Debug.Assert(bytes != null, "[GB18030Encoding.GetChars]bytes is null");
- Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetChars]byteCount is negative");
-// Debug.Assert(chars != null, "[GB18030Encoding.GetChars]chars is null");
- Debug.Assert(charCount >= 0, "[GB18030Encoding.GetChars]charCount is negative");
-
- // Fix our decoder
- GB18030Decoder decoder = (GB18030Decoder)baseDecoder;
-
- // Get our info.
- Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
- this, decoder, chars, charCount, bytes, byteCount);
-
- // Need temp bytes because we can't muss up decoder
- short byte1 = -1;
- short byte2 = -1;
- short byte3 = -1;
- short byte4 = -1;
-
- // See if there was anything to get out of the decoder
- if (decoder != null && decoder.bLeftOver1 != -1)
- {
- // Need temp bytes because we can't muss up decoder
- byte1 = decoder.bLeftOver1;
- byte2 = decoder.bLeftOver2;
- byte3 = decoder.bLeftOver3;
- byte4 = decoder.bLeftOver4;
-
- // Loop because we might have too many in buffer
- // This could happen if we are working on a 4 byte sequence, but it isn't valid.
- while (byte1 != -1)
- {
- // If its not a lead byte, use ? or its value, then scoot them down & try again
- // This could happen if we previously had a bad 4 byte sequence and this is a trail byte
- if (!IsGBLeadByte(byte1))
- {
- // This is either a ? or ASCII, need 1 char output
- if (byte1 <= 0x7f)
- {
- if (!buffer.AddChar((char)byte1)) // Its ASCII
- break;
- }
- else
- {
- if (!buffer.Fallback((byte)byte1)) // Not a valid byte
- break;
- }
-
- byte1 = byte2;
- byte2 = byte3;
- byte3 = byte4;
- byte4 = -1;
- continue;
- }
-
- // Read in more bytes as needed
- while (byte2 == -1 ||
- (IsGBFourByteTrailing(byte2) && byte4 == -1))
- {
- // Do we have room?
- if (!buffer.MoreData)
- {
- // No input left to read, do we have to flush?
- if (!decoder.MustFlush)
- {
- // Don't stick stuff in decoder when counting
- if (chars != null)
- {
- // Don't have to flush, won't have any chars
- // Decoder is correct, just return
- decoder.bLeftOver1 = byte1;
- decoder.bLeftOver2 = byte2;
- decoder.bLeftOver3 = byte3;
- decoder.bLeftOver4 = byte4;
- }
-
- decoder.m_bytesUsed = buffer.BytesUsed;
- return buffer.Count;
- }
-
- // We'll have to flush, add a ? and scoot them down to try again
- // We could be trying for a 4 byte sequence but byte 3 could be ascii and should be spit out
- // Breaking will do this because we have zeros
- break;
- }
-
- // Read them in
- if (byte2 == -1) byte2 = buffer.GetNextByte();
- else if (byte3 == -1) byte3 = buffer.GetNextByte();
- else byte4 = buffer.GetNextByte();
- }
-
- // Now we have our 2 or 4 bytes
- if (IsGBTwoByteTrailing(byte2))
- {
- //
- // The trailing byte is a GB18030 two-byte sequence trailing byte.
- //
- int iTwoBytes = byte1 << 8;
- iTwoBytes |= unchecked((byte)byte2);
- if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2))
- break;
-
- // We're done with it
- byte1 = -1;
- byte2 = -1;
- }
- else if (IsGBFourByteTrailing(byte2) &&
- IsGBLeadByte(byte3) &&
- IsGBFourByteTrailing(byte4))
- {
- //
- // Four-byte GB18030
- //
-
- int sFourBytesOffset = GetFourBytesOffset(
- byte1, byte2, byte3, byte4);
-
- // What kind is it?
- if (sFourBytesOffset <= GBLast4ByteCode)
- {
- //
- // The Unicode will be in the BMP range.
- //
- if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4))
- break;
- }
- else if (sFourBytesOffset >= GBSurrogateOffset &&
- sFourBytesOffset <= GBLastSurrogateOffset)
- {
- //
- // This will be converted to a surrogate pair, need another char
- //
-
- // Use our surrogate
- sFourBytesOffset -= GBSurrogateOffset;
- if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))),
- unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4))
- break;
- }
- else
- {
- // Real GB18030 codepoint, but can't be mapped to unicode
- // We already checked our buffer space.
- // Do fallback here if we impliment decoderfallbacks.
- if (!buffer.Fallback((byte)byte1, (byte)byte2, (byte)byte3, (byte)byte4))
- break;
- }
-
- // We're done with this one
- byte1 = -1;
- byte2 = -1;
- byte3 = -1;
- byte4 = -1;
- }
- else
- {
- // Not a valid sequence, use '?' for 1st byte & scoot them all down 1
- if (!buffer.Fallback((byte)byte1))
- break;
-
- // Move all bytes down 1
- byte1 = byte2;
- byte2 = byte3;
- byte3 = byte4;
- byte4 = -1;
- }
- }
- }
-
- // Loop, just do '?' replacement because we don't have fallbacks for decodings.
- while (buffer.MoreData)
- {
- byte ch = buffer.GetNextByte();
-
- // ASCII case is easy
- if (ch <= 0x7f)
- {
- // ASCII, have room?
- if (!buffer.AddChar((char)ch))
- break; // No room in convert buffer, so stop
- }
- // See if its a lead byte
- else if (IsGBLeadByte(ch))
- {
- // ch is a lead byte, have room for more?
- if (buffer.MoreData)
- {
- byte ch2 = buffer.GetNextByte();
- if (IsGBTwoByteTrailing(ch2))
- {
- //
- // The trailing byte is a GB18030 two-byte sequence trailing byte.
- //
-
- //
- // Two-byte GB18030
- //
- int iTwoBytes = ch << 8;
- iTwoBytes |= ch2;
- if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2))
- break;
- }
- else if (IsGBFourByteTrailing(ch2))
- {
- // Do we have room for Four Byte Sequence? (already have 1 byte)
- if (buffer.EvenMoreData(2))
- {
- // Is it a valid 4 byte sequence?
- byte ch3 = buffer.GetNextByte();
- byte ch4 = buffer.GetNextByte();
- if (IsGBLeadByte(ch3) &&
- IsGBFourByteTrailing(ch4))
- {
- //
- // Four-byte GB18030
- //
- int sFourBytesOffset = GetFourBytesOffset(ch, ch2, ch3, ch4);
-
- // What kind is it?
- // We'll be at least 1 BMP char or a '?' char.
-
- if (sFourBytesOffset <= GBLast4ByteCode)
- {
- //
- // The Unicode will be in the BMP range.
- //
- if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset],4))
- break;
- }
- else if (sFourBytesOffset >= GBSurrogateOffset &&
- sFourBytesOffset <= GBLastSurrogateOffset)
- {
- //
- // This will be converted to a surrogate pair, need another char
- //
-
- // Use our surrogate
- sFourBytesOffset -= GBSurrogateOffset;
- if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))),
- unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))),4))
- break;
- }
- else
- {
- // Real GB18030 codepoint, but can't be mapped to unicode
- if (!buffer.Fallback(ch, ch2, ch3, ch4))
- break;
- }
- }
- else
- {
- // Not a valid 2 or 4 byte sequence, use '?' for ch and try other 3 again
- buffer.AdjustBytes(-3);
- if (!buffer.Fallback(ch))
- break;
- }
- }
- else
- {
- // No room for 4 bytes, have 2 already, may be one more
- // Lead byte but no place to stick it
- if (decoder != null && !decoder.MustFlush)
- {
- // (make sure not to set decoder if counting, so check chars)
- if (chars != null)
- {
- // We'll be able to stick the remainder in the decoder
- byte1 = ch;
- byte2 = ch2;
-
- if (buffer.MoreData)
- byte3 = buffer.GetNextByte();
- else
- byte3 = -1;
-
- byte4=-1;
- }
- break;
- }
-
- // Won't go in decoder, we'll use '?' for it.
- if (!buffer.Fallback(ch, ch2))
- break;
- }
- }
- else
- {
- // Unknown byte sequence, fall back lead byte and try 2nd one again
- buffer.AdjustBytes(-1);
- if (!buffer.Fallback(ch))
- break;
- }
- }
- else
- {
- // Lead byte but don't know about trail byte
- // (make sure not to set decoder if counting, so check bytes)
- if (decoder != null && !decoder.MustFlush)
- {
- // We'll be able to stick it in the decoder
- // (don't actually do it when counting though)
- if (chars != null)
- {
- byte1 = ch;
- byte2 = -1;
- byte3 = -1;
- byte4 = -1;
- }
- break;
- }
-
- if (!buffer.Fallback(ch))
- break;
- }
- }
- else
- {
- // Not ASCII and not a lead byte, we'll use '?' for it if we have room
- if (!buffer.Fallback(ch))
- break;
- }
- }
-
- // Need to flush the decoder if necessary
- // (make sure not to set decoder if counting, so check bytes)
- if (decoder != null)
- {
- if (chars != null)
- {
- decoder.bLeftOver1 = byte1;
- decoder.bLeftOver2 = byte2;
- decoder.bLeftOver3 = byte3;
- decoder.bLeftOver4 = byte4;
- }
- decoder.m_bytesUsed = buffer.BytesUsed;
- }
-
- // Return the # of characters we found
- return buffer.Count;
- }
-
- public override int GetMaxByteCount(int charCount)
- {
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(charCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
- long byteCount = (long)charCount + 1;
-
- if (EncoderFallback.MaxCharCount > 1)
- byteCount *= EncoderFallback.MaxCharCount;
-
- // We could have 4 bytes for each char, no extra for surrogates because 18030 can do whole unicode range.
- byteCount *= 4;
-
- if (byteCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
-
- return (int)byteCount;
- }
-
- public override int GetMaxCharCount(int byteCount)
- {
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(byteCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // Just return length, we could have a single char for each byte + whatever extra our decoder could do to us.
- // If decoder is messed up it could spit out 3 ?s.
- long charCount = ((long)byteCount) + 3;
-
- // Take fallback size into consideration
- if (DecoderFallback.MaxCharCount > 1)
- charCount *= DecoderFallback.MaxCharCount;
-
- if (charCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
-
- return (int)charCount;
- }
-
- public override Decoder GetDecoder()
- {
- return new GB18030Decoder(this);
- }
-
- [Serializable]
- internal sealed class GB18030Decoder : System.Text.DecoderNLS, ISerializable
- {
- internal short bLeftOver1 = -1;
- internal short bLeftOver2 = -1;
- internal short bLeftOver3 = -1;
- internal short bLeftOver4 = -1;
-
- internal GB18030Decoder(EncodingNLS encoding) : base(encoding)
- {
- // DecoderNLS Calls reset
- }
-
- // Constructor called by serialization, have to handle deserializing from Everett
- internal GB18030Decoder(SerializationInfo info, StreamingContext context)
- {
- // Any info?
- if (info==null) throw new ArgumentNullException(nameof(info));
- Contract.EndContractBlock();
-
- try
- {
- //
- // Try Whidbey V2.0 Fields
- //
- this.m_encoding = (Encoding)info.GetValue("m_encoding", typeof(Encoding));
- this.m_fallback = (DecoderFallback)info.GetValue("m_fallback", typeof(DecoderFallback));
- this.bLeftOver1 = (short)info.GetValue("bLeftOver1", typeof(short));
- this.bLeftOver2 = (short)info.GetValue("bLeftOver2", typeof(short));
- this.bLeftOver3 = (short)info.GetValue("bLeftOver3", typeof(short));
- this.bLeftOver4 = (short)info.GetValue("bLeftOver4", typeof(short));
- }
- catch (SerializationException)
- {
- // Didn't have Whidbey stuff, try Everett (DecoderNLS already called Reset())
- this.m_encoding = new GB18030Encoding();
- }
- }
-
- // ISerializable implementation, get data for this object
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // Any info?
- if (info==null) throw new ArgumentNullException(nameof(info));
- Contract.EndContractBlock();
-
- // Save Whidbey data
- // Just need Everett maxCharSize (BaseCodePageEncoding) or m_maxByteSize (MLangBaseCodePageEncoding)
- info.AddValue("m_encoding", this.m_encoding);
- info.AddValue("m_fallback", this.m_fallback);
- info.AddValue("bLeftOver1", this.bLeftOver1);
- info.AddValue("bLeftOver2", this.bLeftOver2);
- info.AddValue("bLeftOver3", this.bLeftOver3);
- info.AddValue("bLeftOver4", this.bLeftOver4);
-
- // Everett needs different data (this is just empty for Everett)
- info.AddValue("m_leftOverBytes", (int)0);
- info.AddValue("leftOver", new byte[8]);
- }
-
- public override void Reset()
- {
- bLeftOver1 = -1;
- bLeftOver2 = -1;
- bLeftOver3 = -1;
- bLeftOver4 = -1;
- if (m_fallbackBuffer != null)
- m_fallbackBuffer.Reset();
- }
-
- // Anything left in our decoder?
- internal override bool HasState
- {
- get
- {
- return (this.bLeftOver1 >= 0);
- }
- }
- }
-
- // tableUnicodeToGBDiffs
- //
- // This compressed data enumerates the differences between gb18030 and the 936 code page as follows:
- //
- // <count> & 0x8000 == 0x8000 The next count <count> characters are identical to 936 characters.
- // <count> & 0x8000 == 0x0000 The next count <count> characters are 4 byte gb18030 characters.
- // Except for:
- // <count> >= 0x9000 && <count> != 0xD1A6 This character is this 2 byte GB18030 value.
- //
- readonly ushort[] tableUnicodeToGBDiffs =
- {
- 0x8080, // U+0000 - U+007F ( 128 chars) use CP 936 conversion.
- 0x0024, // U+0080 - U+00A3 ( 36 chars) are GB18030 81 30 81 30 - 81 30 84 35 (offset 0000 - 0023)
- 0x8001, // U+00A4 - U+00A4 ( 1 chars) use CP 936 conversion.
- 0x0002, // U+00A5 - U+00A6 ( 2 chars) are GB18030 81 30 84 36 - 81 30 84 37 (offset 0024 - 0025)
- 0x8002, // U+00A7 - U+00A8 ( 2 chars) use CP 936 conversion.
- 0x0007, // U+00A9 - U+00AF ( 7 chars) are GB18030 81 30 84 38 - 81 30 85 34 (offset 0026 - 002C)
- 0x8002, // U+00B0 - U+00B1 ( 2 chars) use CP 936 conversion.
- 0x0005, // U+00B2 - U+00B6 ( 5 chars) are GB18030 81 30 85 35 - 81 30 85 39 (offset 002D - 0031)
- 0x8001, // U+00B7 - U+00B7 ( 1 chars) use CP 936 conversion.
- 0x001F, // U+00B8 - U+00D6 ( 31 chars) are GB18030 81 30 86 30 - 81 30 89 30 (offset 0032 - 0050)
- 0x8001, // U+00D7 - U+00D7 ( 1 chars) use CP 936 conversion.
- 0x0008, // U+00D8 - U+00DF ( 8 chars) are GB18030 81 30 89 31 - 81 30 89 38 (offset 0051 - 0058)
- 0x8002, // U+00E0 - U+00E1 ( 2 chars) use CP 936 conversion.
- 0x0006, // U+00E2 - U+00E7 ( 6 chars) are GB18030 81 30 89 39 - 81 30 8A 34 (offset 0059 - 005E)
- 0x8003, // U+00E8 - U+00EA ( 3 chars) use CP 936 conversion.
- 0x0001, // U+00EB - U+00EB ( 1 chars) are GB18030 81 30 8A 35 - 81 30 8A 35 (offset 005F - 005F)
- 0x8002, // U+00EC - U+00ED ( 2 chars) use CP 936 conversion.
- 0x0004, // U+00EE - U+00F1 ( 4 chars) are GB18030 81 30 8A 36 - 81 30 8A 39 (offset 0060 - 0063)
- 0x8002, // U+00F2 - U+00F3 ( 2 chars) use CP 936 conversion.
- 0x0003, // U+00F4 - U+00F6 ( 3 chars) are GB18030 81 30 8B 30 - 81 30 8B 32 (offset 0064 - 0066)
- 0x8001, // U+00F7 - U+00F7 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+00F8 - U+00F8 ( 1 chars) are GB18030 81 30 8B 33 - 81 30 8B 33 (offset 0067 - 0067)
- 0x8002, // U+00F9 - U+00FA ( 2 chars) use CP 936 conversion.
- 0x0001, // U+00FB - U+00FB ( 1 chars) are GB18030 81 30 8B 34 - 81 30 8B 34 (offset 0068 - 0068)
- 0x8001, // U+00FC - U+00FC ( 1 chars) use CP 936 conversion.
- 0x0004, // U+00FD - U+0100 ( 4 chars) are GB18030 81 30 8B 35 - 81 30 8B 38 (offset 0069 - 006C)
- 0x8001, // U+0101 - U+0101 ( 1 chars) use CP 936 conversion.
- 0x0011, // U+0102 - U+0112 ( 17 chars) are GB18030 81 30 8B 39 - 81 30 8D 35 (offset 006D - 007D)
- 0x8001, // U+0113 - U+0113 ( 1 chars) use CP 936 conversion.
- 0x0007, // U+0114 - U+011A ( 7 chars) are GB18030 81 30 8D 36 - 81 30 8E 32 (offset 007E - 0084)
- 0x8001, // U+011B - U+011B ( 1 chars) use CP 936 conversion.
- 0x000F, // U+011C - U+012A ( 15 chars) are GB18030 81 30 8E 33 - 81 30 8F 37 (offset 0085 - 0093)
- 0x8001, // U+012B - U+012B ( 1 chars) use CP 936 conversion.
- 0x0018, // U+012C - U+0143 ( 24 chars) are GB18030 81 30 8F 38 - 81 30 92 31 (offset 0094 - 00AB)
- 0x8001, // U+0144 - U+0144 ( 1 chars) use CP 936 conversion.
- 0x0003, // U+0145 - U+0147 ( 3 chars) are GB18030 81 30 92 32 - 81 30 92 34 (offset 00AC - 00AE)
- 0x8001, // U+0148 - U+0148 ( 1 chars) use CP 936 conversion.
- 0x0004, // U+0149 - U+014C ( 4 chars) are GB18030 81 30 92 35 - 81 30 92 38 (offset 00AF - 00B2)
- 0x8001, // U+014D - U+014D ( 1 chars) use CP 936 conversion.
- 0x001D, // U+014E - U+016A ( 29 chars) are GB18030 81 30 92 39 - 81 30 95 37 (offset 00B3 - 00CF)
- 0x8001, // U+016B - U+016B ( 1 chars) use CP 936 conversion.
- 0x0062, // U+016C - U+01CD ( 98 chars) are GB18030 81 30 95 38 - 81 30 9F 35 (offset 00D0 - 0131)
- 0x8001, // U+01CE - U+01CE ( 1 chars) use CP 936 conversion.
- 0x0001, // U+01CF - U+01CF ( 1 chars) are GB18030 81 30 9F 36 - 81 30 9F 36 (offset 0132 - 0132)
- 0x8001, // U+01D0 - U+01D0 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+01D1 - U+01D1 ( 1 chars) are GB18030 81 30 9F 37 - 81 30 9F 37 (offset 0133 - 0133)
- 0x8001, // U+01D2 - U+01D2 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+01D3 - U+01D3 ( 1 chars) are GB18030 81 30 9F 38 - 81 30 9F 38 (offset 0134 - 0134)
- 0x8001, // U+01D4 - U+01D4 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+01D5 - U+01D5 ( 1 chars) are GB18030 81 30 9F 39 - 81 30 9F 39 (offset 0135 - 0135)
- 0x8001, // U+01D6 - U+01D6 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+01D7 - U+01D7 ( 1 chars) are GB18030 81 30 A0 30 - 81 30 A0 30 (offset 0136 - 0136)
- 0x8001, // U+01D8 - U+01D8 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+01D9 - U+01D9 ( 1 chars) are GB18030 81 30 A0 31 - 81 30 A0 31 (offset 0137 - 0137)
- 0x8001, // U+01DA - U+01DA ( 1 chars) use CP 936 conversion.
- 0x0001, // U+01DB - U+01DB ( 1 chars) are GB18030 81 30 A0 32 - 81 30 A0 32 (offset 0138 - 0138)
- 0x8001, // U+01DC - U+01DC ( 1 chars) use CP 936 conversion.
- 0x001C, // U+01DD - U+01F8 ( 28 chars) are GB18030 81 30 A0 33 - 81 30 A3 30 (offset 0139 - 0154)
- 0xA8BF, // U+01F9 is non-936 GB18030 value A8 BF.
- 0x0057, // U+01FA - U+0250 ( 87 chars) are GB18030 81 30 A3 31 - 81 30 AB 37 (offset 0155 - 01AB)
- 0x8001, // U+0251 - U+0251 ( 1 chars) use CP 936 conversion.
- 0x000F, // U+0252 - U+0260 ( 15 chars) are GB18030 81 30 AB 38 - 81 30 AD 32 (offset 01AC - 01BA)
- 0x8001, // U+0261 - U+0261 ( 1 chars) use CP 936 conversion.
- 0x0065, // U+0262 - U+02C6 ( 101 chars) are GB18030 81 30 AD 33 - 81 30 B7 33 (offset 01BB - 021F)
- 0x8001, // U+02C7 - U+02C7 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+02C8 - U+02C8 ( 1 chars) are GB18030 81 30 B7 34 - 81 30 B7 34 (offset 0220 - 0220)
- 0x8003, // U+02C9 - U+02CB ( 3 chars) use CP 936 conversion.
- 0x000D, // U+02CC - U+02D8 ( 13 chars) are GB18030 81 30 B7 35 - 81 30 B8 37 (offset 0221 - 022D)
- 0x8001, // U+02D9 - U+02D9 ( 1 chars) use CP 936 conversion.
- 0x00B7, // U+02DA - U+0390 ( 183 chars) are GB18030 81 30 B8 38 - 81 30 CB 30 (offset 022E - 02E4)
- 0x8011, // U+0391 - U+03A1 ( 17 chars) use CP 936 conversion.
- 0x0001, // U+03A2 - U+03A2 ( 1 chars) are GB18030 81 30 CB 31 - 81 30 CB 31 (offset 02E5 - 02E5)
- 0x8007, // U+03A3 - U+03A9 ( 7 chars) use CP 936 conversion.
- 0x0007, // U+03AA - U+03B0 ( 7 chars) are GB18030 81 30 CB 32 - 81 30 CB 38 (offset 02E6 - 02EC)
- 0x8011, // U+03B1 - U+03C1 ( 17 chars) use CP 936 conversion.
- 0x0001, // U+03C2 - U+03C2 ( 1 chars) are GB18030 81 30 CB 39 - 81 30 CB 39 (offset 02ED - 02ED)
- 0x8007, // U+03C3 - U+03C9 ( 7 chars) use CP 936 conversion.
- 0x0037, // U+03CA - U+0400 ( 55 chars) are GB18030 81 30 CC 30 - 81 30 D1 34 (offset 02EE - 0324)
- 0x8001, // U+0401 - U+0401 ( 1 chars) use CP 936 conversion.
- 0x000E, // U+0402 - U+040F ( 14 chars) are GB18030 81 30 D1 35 - 81 30 D2 38 (offset 0325 - 0332)
- 0x8040, // U+0410 - U+044F ( 64 chars) use CP 936 conversion.
- 0x0001, // U+0450 - U+0450 ( 1 chars) are GB18030 81 30 D2 39 - 81 30 D2 39 (offset 0333 - 0333)
- 0x8001, // U+0451 - U+0451 ( 1 chars) use CP 936 conversion.
- 0x1BBE, // U+0452 - U+200F ( 7102 chars) are GB18030 81 30 D3 30 - 81 36 A5 31 (offset 0334 - 1EF1)
- 0x8001, // U+2010 - U+2010 ( 1 chars) use CP 936 conversion.
- 0x0002, // U+2011 - U+2012 ( 2 chars) are GB18030 81 36 A5 32 - 81 36 A5 33 (offset 1EF2 - 1EF3)
- 0x8004, // U+2013 - U+2016 ( 4 chars) use CP 936 conversion.
- 0x0001, // U+2017 - U+2017 ( 1 chars) are GB18030 81 36 A5 34 - 81 36 A5 34 (offset 1EF4 - 1EF4)
- 0x8002, // U+2018 - U+2019 ( 2 chars) use CP 936 conversion.
- 0x0002, // U+201A - U+201B ( 2 chars) are GB18030 81 36 A5 35 - 81 36 A5 36 (offset 1EF5 - 1EF6)
- 0x8002, // U+201C - U+201D ( 2 chars) use CP 936 conversion.
- 0x0007, // U+201E - U+2024 ( 7 chars) are GB18030 81 36 A5 37 - 81 36 A6 33 (offset 1EF7 - 1EFD)
- 0x8002, // U+2025 - U+2026 ( 2 chars) use CP 936 conversion.
- 0x0009, // U+2027 - U+202F ( 9 chars) are GB18030 81 36 A6 34 - 81 36 A7 32 (offset 1EFE - 1F06)
- 0x8001, // U+2030 - U+2030 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+2031 - U+2031 ( 1 chars) are GB18030 81 36 A7 33 - 81 36 A7 33 (offset 1F07 - 1F07)
- 0x8002, // U+2032 - U+2033 ( 2 chars) use CP 936 conversion.
- 0x0001, // U+2034 - U+2034 ( 1 chars) are GB18030 81 36 A7 34 - 81 36 A7 34 (offset 1F08 - 1F08)
- 0x8001, // U+2035 - U+2035 ( 1 chars) use CP 936 conversion.
- 0x0005, // U+2036 - U+203A ( 5 chars) are GB18030 81 36 A7 35 - 81 36 A7 39 (offset 1F09 - 1F0D)
- 0x8001, // U+203B - U+203B ( 1 chars) use CP 936 conversion.
- 0x0070, // U+203C - U+20AB ( 112 chars) are GB18030 81 36 A8 30 - 81 36 B3 31 (offset 1F0E - 1F7D)
- 0xA2E3, // U+20AC is non-936 GB18030 value A2 E3.
- 0x0056, // U+20AD - U+2102 ( 86 chars) are GB18030 81 36 B3 32 - 81 36 BB 37 (offset 1F7E - 1FD3)
- 0x8001, // U+2103 - U+2103 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+2104 - U+2104 ( 1 chars) are GB18030 81 36 BB 38 - 81 36 BB 38 (offset 1FD4 - 1FD4)
- 0x8001, // U+2105 - U+2105 ( 1 chars) use CP 936 conversion.
- 0x0003, // U+2106 - U+2108 ( 3 chars) are GB18030 81 36 BB 39 - 81 36 BC 31 (offset 1FD5 - 1FD7)
- 0x8001, // U+2109 - U+2109 ( 1 chars) use CP 936 conversion.
- 0x000C, // U+210A - U+2115 ( 12 chars) are GB18030 81 36 BC 32 - 81 36 BD 33 (offset 1FD8 - 1FE3)
- 0x8001, // U+2116 - U+2116 ( 1 chars) use CP 936 conversion.
- 0x000A, // U+2117 - U+2120 ( 10 chars) are GB18030 81 36 BD 34 - 81 36 BE 33 (offset 1FE4 - 1FED)
- 0x8001, // U+2121 - U+2121 ( 1 chars) use CP 936 conversion.
- 0x003E, // U+2122 - U+215F ( 62 chars) are GB18030 81 36 BE 34 - 81 36 C4 35 (offset 1FEE - 202B)
- 0x800C, // U+2160 - U+216B ( 12 chars) use CP 936 conversion.
- 0x0004, // U+216C - U+216F ( 4 chars) are GB18030 81 36 C4 36 - 81 36 C4 39 (offset 202C - 202F)
- 0x800A, // U+2170 - U+2179 ( 10 chars) use CP 936 conversion.
- 0x0016, // U+217A - U+218F ( 22 chars) are GB18030 81 36 C5 30 - 81 36 C7 31 (offset 2030 - 2045)
- 0x8004, // U+2190 - U+2193 ( 4 chars) use CP 936 conversion.
- 0x0002, // U+2194 - U+2195 ( 2 chars) are GB18030 81 36 C7 32 - 81 36 C7 33 (offset 2046 - 2047)
- 0x8004, // U+2196 - U+2199 ( 4 chars) use CP 936 conversion.
- 0x006E, // U+219A - U+2207 ( 110 chars) are GB18030 81 36 C7 34 - 81 36 D2 33 (offset 2048 - 20B5)
- 0x8001, // U+2208 - U+2208 ( 1 chars) use CP 936 conversion.
- 0x0006, // U+2209 - U+220E ( 6 chars) are GB18030 81 36 D2 34 - 81 36 D2 39 (offset 20B6 - 20BB)
- 0x8001, // U+220F - U+220F ( 1 chars) use CP 936 conversion.
- 0x0001, // U+2210 - U+2210 ( 1 chars) are GB18030 81 36 D3 30 - 81 36 D3 30 (offset 20BC - 20BC)
- 0x8001, // U+2211 - U+2211 ( 1 chars) use CP 936 conversion.
- 0x0003, // U+2212 - U+2214 ( 3 chars) are GB18030 81 36 D3 31 - 81 36 D3 33 (offset 20BD - 20BF)
- 0x8001, // U+2215 - U+2215 ( 1 chars) use CP 936 conversion.
- 0x0004, // U+2216 - U+2219 ( 4 chars) are GB18030 81 36 D3 34 - 81 36 D3 37 (offset 20C0 - 20C3)
- 0x8001, // U+221A - U+221A ( 1 chars) use CP 936 conversion.
- 0x0002, // U+221B - U+221C ( 2 chars) are GB18030 81 36 D3 38 - 81 36 D3 39 (offset 20C4 - 20C5)
- 0x8004, // U+221D - U+2220 ( 4 chars) use CP 936 conversion.
- 0x0002, // U+2221 - U+2222 ( 2 chars) are GB18030 81 36 D4 30 - 81 36 D4 31 (offset 20C6 - 20C7)
- 0x8001, // U+2223 - U+2223 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+2224 - U+2224 ( 1 chars) are GB18030 81 36 D4 32 - 81 36 D4 32 (offset 20C8 - 20C8)
- 0x8001, // U+2225 - U+2225 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+2226 - U+2226 ( 1 chars) are GB18030 81 36 D4 33 - 81 36 D4 33 (offset 20C9 - 20C9)
- 0x8005, // U+2227 - U+222B ( 5 chars) use CP 936 conversion.
- 0x0002, // U+222C - U+222D ( 2 chars) are GB18030 81 36 D4 34 - 81 36 D4 35 (offset 20CA - 20CB)
- 0x8001, // U+222E - U+222E ( 1 chars) use CP 936 conversion.
- 0x0005, // U+222F - U+2233 ( 5 chars) are GB18030 81 36 D4 36 - 81 36 D5 30 (offset 20CC - 20D0)
- 0x8004, // U+2234 - U+2237 ( 4 chars) use CP 936 conversion.
- 0x0005, // U+2238 - U+223C ( 5 chars) are GB18030 81 36 D5 31 - 81 36 D5 35 (offset 20D1 - 20D5)
- 0x8001, // U+223D - U+223D ( 1 chars) use CP 936 conversion.
- 0x000A, // U+223E - U+2247 ( 10 chars) are GB18030 81 36 D5 36 - 81 36 D6 35 (offset 20D6 - 20DF)
- 0x8001, // U+2248 - U+2248 ( 1 chars) use CP 936 conversion.
- 0x0003, // U+2249 - U+224B ( 3 chars) are GB18030 81 36 D6 36 - 81 36 D6 38 (offset 20E0 - 20E2)
- 0x8001, // U+224C - U+224C ( 1 chars) use CP 936 conversion.
- 0x0005, // U+224D - U+2251 ( 5 chars) are GB18030 81 36 D6 39 - 81 36 D7 33 (offset 20E3 - 20E7)
- 0x8001, // U+2252 - U+2252 ( 1 chars) use CP 936 conversion.
- 0x000D, // U+2253 - U+225F ( 13 chars) are GB18030 81 36 D7 34 - 81 36 D8 36 (offset 20E8 - 20F4)
- 0x8002, // U+2260 - U+2261 ( 2 chars) use CP 936 conversion.
- 0x0002, // U+2262 - U+2263 ( 2 chars) are GB18030 81 36 D8 37 - 81 36 D8 38 (offset 20F5 - 20F6)
- 0x8004, // U+2264 - U+2267 ( 4 chars) use CP 936 conversion.
- 0x0006, // U+2268 - U+226D ( 6 chars) are GB18030 81 36 D8 39 - 81 36 D9 34 (offset 20F7 - 20FC)
- 0x8002, // U+226E - U+226F ( 2 chars) use CP 936 conversion.
- 0x0025, // U+2270 - U+2294 ( 37 chars) are GB18030 81 36 D9 35 - 81 36 DD 31 (offset 20FD - 2121)
- 0x8001, // U+2295 - U+2295 ( 1 chars) use CP 936 conversion.
- 0x0003, // U+2296 - U+2298 ( 3 chars) are GB18030 81 36 DD 32 - 81 36 DD 34 (offset 2122 - 2124)
- 0x8001, // U+2299 - U+2299 ( 1 chars) use CP 936 conversion.
- 0x000B, // U+229A - U+22A4 ( 11 chars) are GB18030 81 36 DD 35 - 81 36 DE 35 (offset 2125 - 212F)
- 0x8001, // U+22A5 - U+22A5 ( 1 chars) use CP 936 conversion.
- 0x0019, // U+22A6 - U+22BE ( 25 chars) are GB18030 81 36 DE 36 - 81 36 E1 30 (offset 2130 - 2148)
- 0x8001, // U+22BF - U+22BF ( 1 chars) use CP 936 conversion.
- 0x0052, // U+22C0 - U+2311 ( 82 chars) are GB18030 81 36 E1 31 - 81 36 E9 32 (offset 2149 - 219A)
- 0x8001, // U+2312 - U+2312 ( 1 chars) use CP 936 conversion.
- 0x014D, // U+2313 - U+245F ( 333 chars) are GB18030 81 36 E9 33 - 81 37 8C 35 (offset 219B - 22E7)
- 0x800A, // U+2460 - U+2469 ( 10 chars) use CP 936 conversion.
- 0x000A, // U+246A - U+2473 ( 10 chars) are GB18030 81 37 8C 36 - 81 37 8D 35 (offset 22E8 - 22F1)
- 0x8028, // U+2474 - U+249B ( 40 chars) use CP 936 conversion.
- 0x0064, // U+249C - U+24FF ( 100 chars) are GB18030 81 37 8D 36 - 81 37 97 35 (offset 22F2 - 2355)
- 0x804C, // U+2500 - U+254B ( 76 chars) use CP 936 conversion.
- 0x0004, // U+254C - U+254F ( 4 chars) are GB18030 81 37 97 36 - 81 37 97 39 (offset 2356 - 2359)
- 0x8024, // U+2550 - U+2573 ( 36 chars) use CP 936 conversion.
- 0x000D, // U+2574 - U+2580 ( 13 chars) are GB18030 81 37 98 30 - 81 37 99 32 (offset 235A - 2366)
- 0x800F, // U+2581 - U+258F ( 15 chars) use CP 936 conversion.
- 0x0003, // U+2590 - U+2592 ( 3 chars) are GB18030 81 37 99 33 - 81 37 99 35 (offset 2367 - 2369)
- 0x8003, // U+2593 - U+2595 ( 3 chars) use CP 936 conversion.
- 0x000A, // U+2596 - U+259F ( 10 chars) are GB18030 81 37 99 36 - 81 37 9A 35 (offset 236A - 2373)
- 0x8002, // U+25A0 - U+25A1 ( 2 chars) use CP 936 conversion.
- 0x0010, // U+25A2 - U+25B1 ( 16 chars) are GB18030 81 37 9A 36 - 81 37 9C 31 (offset 2374 - 2383)
- 0x8002, // U+25B2 - U+25B3 ( 2 chars) use CP 936 conversion.
- 0x0008, // U+25B4 - U+25BB ( 8 chars) are GB18030 81 37 9C 32 - 81 37 9C 39 (offset 2384 - 238B)
- 0x8002, // U+25BC - U+25BD ( 2 chars) use CP 936 conversion.
- 0x0008, // U+25BE - U+25C5 ( 8 chars) are GB18030 81 37 9D 30 - 81 37 9D 37 (offset 238C - 2393)
- 0x8002, // U+25C6 - U+25C7 ( 2 chars) use CP 936 conversion.
- 0x0003, // U+25C8 - U+25CA ( 3 chars) are GB18030 81 37 9D 38 - 81 37 9E 30 (offset 2394 - 2396)
- 0x8001, // U+25CB - U+25CB ( 1 chars) use CP 936 conversion.
- 0x0002, // U+25CC - U+25CD ( 2 chars) are GB18030 81 37 9E 31 - 81 37 9E 32 (offset 2397 - 2398)
- 0x8002, // U+25CE - U+25CF ( 2 chars) use CP 936 conversion.
- 0x0012, // U+25D0 - U+25E1 ( 18 chars) are GB18030 81 37 9E 33 - 81 37 A0 30 (offset 2399 - 23AA)
- 0x8004, // U+25E2 - U+25E5 ( 4 chars) use CP 936 conversion.
- 0x001F, // U+25E6 - U+2604 ( 31 chars) are GB18030 81 37 A0 31 - 81 37 A3 31 (offset 23AB - 23C9)
- 0x8002, // U+2605 - U+2606 ( 2 chars) use CP 936 conversion.
- 0x0002, // U+2607 - U+2608 ( 2 chars) are GB18030 81 37 A3 32 - 81 37 A3 33 (offset 23CA - 23CB)
- 0x8001, // U+2609 - U+2609 ( 1 chars) use CP 936 conversion.
- 0x0036, // U+260A - U+263F ( 54 chars) are GB18030 81 37 A3 34 - 81 37 A8 37 (offset 23CC - 2401)
- 0x8001, // U+2640 - U+2640 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+2641 - U+2641 ( 1 chars) are GB18030 81 37 A8 38 - 81 37 A8 38 (offset 2402 - 2402)
- 0x8001, // U+2642 - U+2642 ( 1 chars) use CP 936 conversion.
- 0x083E, // U+2643 - U+2E80 ( 2110 chars) are GB18030 81 37 A8 39 - 81 38 FD 38 (offset 2403 - 2C40)
- 0xFE50, // U+2E81 is non-936 GB18030 value FE 50.
- 0x0002, // U+2E82 - U+2E83 ( 2 chars) are GB18030 81 38 FD 39 - 81 38 FE 30 (offset 2C41 - 2C42)
- 0xFE54, // U+2E84 is non-936 GB18030 value FE 54.
- 0x0003, // U+2E85 - U+2E87 ( 3 chars) are GB18030 81 38 FE 31 - 81 38 FE 33 (offset 2C43 - 2C45)
- 0xFE57, // U+2E88 is non-936 GB18030 value FE 57.
- 0x0002, // U+2E89 - U+2E8A ( 2 chars) are GB18030 81 38 FE 34 - 81 38 FE 35 (offset 2C46 - 2C47)
- 0xFE58, // U+2E8B is non-936 GB18030 value FE 58.
- 0xFE5D, // U+2E8C is non-936 GB18030 value FE 5D.
- 0x000A, // U+2E8D - U+2E96 ( 10 chars) are GB18030 81 38 FE 36 - 81 39 81 35 (offset 2C48 - 2C51)
- 0xFE5E, // U+2E97 is non-936 GB18030 value FE 5E.
- 0x000F, // U+2E98 - U+2EA6 ( 15 chars) are GB18030 81 39 81 36 - 81 39 83 30 (offset 2C52 - 2C60)
- 0xFE6B, // U+2EA7 is non-936 GB18030 value FE 6B.
- 0x0002, // U+2EA8 - U+2EA9 ( 2 chars) are GB18030 81 39 83 31 - 81 39 83 32 (offset 2C61 - 2C62)
- 0xFE6E, // U+2EAA is non-936 GB18030 value FE 6E.
- 0x0003, // U+2EAB - U+2EAD ( 3 chars) are GB18030 81 39 83 33 - 81 39 83 35 (offset 2C63 - 2C65)
- 0xFE71, // U+2EAE is non-936 GB18030 value FE 71.
- 0x0004, // U+2EAF - U+2EB2 ( 4 chars) are GB18030 81 39 83 36 - 81 39 83 39 (offset 2C66 - 2C69)
- 0xFE73, // U+2EB3 is non-936 GB18030 value FE 73.
- 0x0002, // U+2EB4 - U+2EB5 ( 2 chars) are GB18030 81 39 84 30 - 81 39 84 31 (offset 2C6A - 2C6B)
- 0xFE74, // U+2EB6 is non-936 GB18030 value FE 74.
- 0xFE75, // U+2EB7 is non-936 GB18030 value FE 75.
- 0x0003, // U+2EB8 - U+2EBA ( 3 chars) are GB18030 81 39 84 32 - 81 39 84 34 (offset 2C6C - 2C6E)
- 0xFE79, // U+2EBB is non-936 GB18030 value FE 79.
- 0x000E, // U+2EBC - U+2EC9 ( 14 chars) are GB18030 81 39 84 35 - 81 39 85 38 (offset 2C6F - 2C7C)
- 0xFE84, // U+2ECA is non-936 GB18030 value FE 84.
- 0x0125, // U+2ECB - U+2FEF ( 293 chars) are GB18030 81 39 85 39 - 81 39 A3 31 (offset 2C7D - 2DA1)
- 0xA98A, // U+2FF0 is non-936 GB18030 value A9 8A.
- 0xA98B, // U+2FF1 is non-936 GB18030 value A9 8B.
- 0xA98C, // U+2FF2 is non-936 GB18030 value A9 8C.
- 0xA98D, // U+2FF3 is non-936 GB18030 value A9 8D.
- 0xA98E, // U+2FF4 is non-936 GB18030 value A9 8E.
- 0xA98F, // U+2FF5 is non-936 GB18030 value A9 8F.
- 0xA990, // U+2FF6 is non-936 GB18030 value A9 90.
- 0xA991, // U+2FF7 is non-936 GB18030 value A9 91.
- 0xA992, // U+2FF8 is non-936 GB18030 value A9 92.
- 0xA993, // U+2FF9 is non-936 GB18030 value A9 93.
- 0xA994, // U+2FFA is non-936 GB18030 value A9 94.
- 0xA995, // U+2FFB is non-936 GB18030 value A9 95.
- 0x0004, // U+2FFC - U+2FFF ( 4 chars) are GB18030 81 39 A3 32 - 81 39 A3 35 (offset 2DA2 - 2DA5)
- 0x8004, // U+3000 - U+3003 ( 4 chars) use CP 936 conversion.
- 0x0001, // U+3004 - U+3004 ( 1 chars) are GB18030 81 39 A3 36 - 81 39 A3 36 (offset 2DA6 - 2DA6)
- 0x8013, // U+3005 - U+3017 ( 19 chars) use CP 936 conversion.
- 0x0005, // U+3018 - U+301C ( 5 chars) are GB18030 81 39 A3 37 - 81 39 A4 31 (offset 2DA7 - 2DAB)
- 0x8002, // U+301D - U+301E ( 2 chars) use CP 936 conversion.
- 0x0002, // U+301F - U+3020 ( 2 chars) are GB18030 81 39 A4 32 - 81 39 A4 33 (offset 2DAC - 2DAD)
- 0x8009, // U+3021 - U+3029 ( 9 chars) use CP 936 conversion.
- 0x0014, // U+302A - U+303D ( 20 chars) are GB18030 81 39 A4 34 - 81 39 A6 33 (offset 2DAE - 2DC1)
- 0xA989, // U+303E is non-936 GB18030 value A9 89.
- 0x0002, // U+303F - U+3040 ( 2 chars) are GB18030 81 39 A6 34 - 81 39 A6 35 (offset 2DC2 - 2DC3)
- 0x8053, // U+3041 - U+3093 ( 83 chars) use CP 936 conversion.
- 0x0007, // U+3094 - U+309A ( 7 chars) are GB18030 81 39 A6 36 - 81 39 A7 32 (offset 2DC4 - 2DCA)
- 0x8004, // U+309B - U+309E ( 4 chars) use CP 936 conversion.
- 0x0002, // U+309F - U+30A0 ( 2 chars) are GB18030 81 39 A7 33 - 81 39 A7 34 (offset 2DCB - 2DCC)
- 0x8056, // U+30A1 - U+30F6 ( 86 chars) use CP 936 conversion.
- 0x0005, // U+30F7 - U+30FB ( 5 chars) are GB18030 81 39 A7 35 - 81 39 A7 39 (offset 2DCD - 2DD1)
- 0x8003, // U+30FC - U+30FE ( 3 chars) use CP 936 conversion.
- 0x0006, // U+30FF - U+3104 ( 6 chars) are GB18030 81 39 A8 30 - 81 39 A8 35 (offset 2DD2 - 2DD7)
- 0x8025, // U+3105 - U+3129 ( 37 chars) use CP 936 conversion.
- 0x00F6, // U+312A - U+321F ( 246 chars) are GB18030 81 39 A8 36 - 81 39 C1 31 (offset 2DD8 - 2ECD)
- 0x800A, // U+3220 - U+3229 ( 10 chars) use CP 936 conversion.
- 0x0007, // U+322A - U+3230 ( 7 chars) are GB18030 81 39 C1 32 - 81 39 C1 38 (offset 2ECE - 2ED4)
- 0x8001, // U+3231 - U+3231 ( 1 chars) use CP 936 conversion.
- 0x0071, // U+3232 - U+32A2 ( 113 chars) are GB18030 81 39 C1 39 - 81 39 CD 31 (offset 2ED5 - 2F45)
- 0x8001, // U+32A3 - U+32A3 ( 1 chars) use CP 936 conversion.
- 0x00EA, // U+32A4 - U+338D ( 234 chars) are GB18030 81 39 CD 32 - 81 39 E4 35 (offset 2F46 - 302F)
- 0x8002, // U+338E - U+338F ( 2 chars) use CP 936 conversion.
- 0x000C, // U+3390 - U+339B ( 12 chars) are GB18030 81 39 E4 36 - 81 39 E5 37 (offset 3030 - 303B)
- 0x8003, // U+339C - U+339E ( 3 chars) use CP 936 conversion.
- 0x0002, // U+339F - U+33A0 ( 2 chars) are GB18030 81 39 E5 38 - 81 39 E5 39 (offset 303C - 303D)
- 0x8001, // U+33A1 - U+33A1 ( 1 chars) use CP 936 conversion.
- 0x0022, // U+33A2 - U+33C3 ( 34 chars) are GB18030 81 39 E6 30 - 81 39 E9 33 (offset 303E - 305F)
- 0x8001, // U+33C4 - U+33C4 ( 1 chars) use CP 936 conversion.
- 0x0009, // U+33C5 - U+33CD ( 9 chars) are GB18030 81 39 E9 34 - 81 39 EA 32 (offset 3060 - 3068)
- 0x8001, // U+33CE - U+33CE ( 1 chars) use CP 936 conversion.
- 0x0002, // U+33CF - U+33D0 ( 2 chars) are GB18030 81 39 EA 33 - 81 39 EA 34 (offset 3069 - 306A)
- 0x8002, // U+33D1 - U+33D2 ( 2 chars) use CP 936 conversion.
- 0x0002, // U+33D3 - U+33D4 ( 2 chars) are GB18030 81 39 EA 35 - 81 39 EA 36 (offset 306B - 306C)
- 0x8001, // U+33D5 - U+33D5 ( 1 chars) use CP 936 conversion.
- 0x0071, // U+33D6 - U+3446 ( 113 chars) are GB18030 81 39 EA 37 - 81 39 F5 39 (offset 306D - 30DD)
- 0xFE56, // U+3447 is non-936 GB18030 value FE 56.
- 0x002B, // U+3448 - U+3472 ( 43 chars) are GB18030 81 39 F6 30 - 81 39 FA 32 (offset 30DE - 3108)
- 0xFE55, // U+3473 is non-936 GB18030 value FE 55.
- 0x012A, // U+3474 - U+359D ( 298 chars) are GB18030 81 39 FA 33 - 82 30 9A 30 (offset 3109 - 3232)
- 0xFE5A, // U+359E is non-936 GB18030 value FE 5A.
- 0x006F, // U+359F - U+360D ( 111 chars) are GB18030 82 30 9A 31 - 82 30 A5 31 (offset 3233 - 32A1)
- 0xFE5C, // U+360E is non-936 GB18030 value FE 5C.
- 0x000B, // U+360F - U+3619 ( 11 chars) are GB18030 82 30 A5 32 - 82 30 A6 32 (offset 32A2 - 32AC)
- 0xFE5B, // U+361A is non-936 GB18030 value FE 5B.
- 0x02FD, // U+361B - U+3917 ( 765 chars) are GB18030 82 30 A6 33 - 82 30 F2 37 (offset 32AD - 35A9)
- 0xFE60, // U+3918 is non-936 GB18030 value FE 60.
- 0x0055, // U+3919 - U+396D ( 85 chars) are GB18030 82 30 F2 38 - 82 30 FB 32 (offset 35AA - 35FE)
- 0xFE5F, // U+396E is non-936 GB18030 value FE 5F.
- 0x0060, // U+396F - U+39CE ( 96 chars) are GB18030 82 30 FB 33 - 82 31 86 38 (offset 35FF - 365E)
- 0xFE62, // U+39CF is non-936 GB18030 value FE 62.
- 0xFE65, // U+39D0 is non-936 GB18030 value FE 65.
- 0x000E, // U+39D1 - U+39DE ( 14 chars) are GB18030 82 31 86 39 - 82 31 88 32 (offset 365F - 366C)
- 0xFE63, // U+39DF is non-936 GB18030 value FE 63.
- 0x0093, // U+39E0 - U+3A72 ( 147 chars) are GB18030 82 31 88 33 - 82 31 96 39 (offset 366D - 36FF)
- 0xFE64, // U+3A73 is non-936 GB18030 value FE 64.
- 0x00DA, // U+3A74 - U+3B4D ( 218 chars) are GB18030 82 31 97 30 - 82 31 AC 37 (offset 3700 - 37D9)
- 0xFE68, // U+3B4E is non-936 GB18030 value FE 68.
- 0x011F, // U+3B4F - U+3C6D ( 287 chars) are GB18030 82 31 AC 38 - 82 31 C9 34 (offset 37DA - 38F8)
- 0xFE69, // U+3C6E is non-936 GB18030 value FE 69.
- 0x0071, // U+3C6F - U+3CDF ( 113 chars) are GB18030 82 31 C9 35 - 82 31 D4 37 (offset 38F9 - 3969)
- 0xFE6A, // U+3CE0 is non-936 GB18030 value FE 6A.
- 0x0375, // U+3CE1 - U+4055 ( 885 chars) are GB18030 82 31 D4 38 - 82 32 AF 32 (offset 396A - 3CDE)
- 0xFE6F, // U+4056 is non-936 GB18030 value FE 6F.
- 0x0108, // U+4057 - U+415E ( 264 chars) are GB18030 82 32 AF 33 - 82 32 C9 36 (offset 3CDF - 3DE6)
- 0xFE70, // U+415F is non-936 GB18030 value FE 70.
- 0x01D7, // U+4160 - U+4336 ( 471 chars) are GB18030 82 32 C9 37 - 82 32 F8 37 (offset 3DE7 - 3FBD)
- 0xFE72, // U+4337 is non-936 GB18030 value FE 72.
- 0x0074, // U+4338 - U+43AB ( 116 chars) are GB18030 82 32 F8 38 - 82 33 86 33 (offset 3FBE - 4031)
- 0xFE78, // U+43AC is non-936 GB18030 value FE 78.
- 0x0004, // U+43AD - U+43B0 ( 4 chars) are GB18030 82 33 86 34 - 82 33 86 37 (offset 4032 - 4035)
- 0xFE77, // U+43B1 is non-936 GB18030 value FE 77.
- 0x002B, // U+43B2 - U+43DC ( 43 chars) are GB18030 82 33 86 38 - 82 33 8B 30 (offset 4036 - 4060)
- 0xFE7A, // U+43DD is non-936 GB18030 value FE 7A.
- 0x00F8, // U+43DE - U+44D5 ( 248 chars) are GB18030 82 33 8B 31 - 82 33 A3 38 (offset 4061 - 4158)
- 0xFE7B, // U+44D6 is non-936 GB18030 value FE 7B.
- 0x0175, // U+44D7 - U+464B ( 373 chars) are GB18030 82 33 A3 39 - 82 33 C9 31 (offset 4159 - 42CD)
- 0xFE7D, // U+464C is non-936 GB18030 value FE 7D.
- 0x0014, // U+464D - U+4660 ( 20 chars) are GB18030 82 33 C9 32 - 82 33 CB 31 (offset 42CE - 42E1)
- 0xFE7C, // U+4661 is non-936 GB18030 value FE 7C.
- 0x00C1, // U+4662 - U+4722 ( 193 chars) are GB18030 82 33 CB 32 - 82 33 DE 34 (offset 42E2 - 43A2)
- 0xFE80, // U+4723 is non-936 GB18030 value FE 80.
- 0x0005, // U+4724 - U+4728 ( 5 chars) are GB18030 82 33 DE 35 - 82 33 DE 39 (offset 43A3 - 43A7)
- 0xFE81, // U+4729 is non-936 GB18030 value FE 81.
- 0x0052, // U+472A - U+477B ( 82 chars) are GB18030 82 33 DF 30 - 82 33 E7 31 (offset 43A8 - 43F9)
- 0xFE82, // U+477C is non-936 GB18030 value FE 82.
- 0x0010, // U+477D - U+478C ( 16 chars) are GB18030 82 33 E7 32 - 82 33 E8 37 (offset 43FA - 4409)
- 0xFE83, // U+478D is non-936 GB18030 value FE 83.
- 0x01B9, // U+478E - U+4946 ( 441 chars) are GB18030 82 33 E8 38 - 82 34 96 38 (offset 440A - 45C2)
- 0xFE85, // U+4947 is non-936 GB18030 value FE 85.
- 0x0032, // U+4948 - U+4979 ( 50 chars) are GB18030 82 34 96 39 - 82 34 9B 38 (offset 45C3 - 45F4)
- 0xFE86, // U+497A is non-936 GB18030 value FE 86.
- 0x0002, // U+497B - U+497C ( 2 chars) are GB18030 82 34 9B 39 - 82 34 9C 30 (offset 45F5 - 45F6)
- 0xFE87, // U+497D is non-936 GB18030 value FE 87.
- 0x0004, // U+497E - U+4981 ( 4 chars) are GB18030 82 34 9C 31 - 82 34 9C 34 (offset 45F7 - 45FA)
- 0xFE88, // U+4982 is non-936 GB18030 value FE 88.
- 0xFE89, // U+4983 is non-936 GB18030 value FE 89.
- 0x0001, // U+4984 - U+4984 ( 1 chars) are GB18030 82 34 9C 35 - 82 34 9C 35 (offset 45FB - 45FB)
- 0xFE8A, // U+4985 is non-936 GB18030 value FE 8A.
- 0xFE8B, // U+4986 is non-936 GB18030 value FE 8B.
- 0x0014, // U+4987 - U+499A ( 20 chars) are GB18030 82 34 9C 36 - 82 34 9E 35 (offset 45FC - 460F)
- 0xFE8D, // U+499B is non-936 GB18030 value FE 8D.
- 0x0003, // U+499C - U+499E ( 3 chars) are GB18030 82 34 9E 36 - 82 34 9E 38 (offset 4610 - 4612)
- 0xFE8C, // U+499F is non-936 GB18030 value FE 8C.
- 0x0016, // U+49A0 - U+49B5 ( 22 chars) are GB18030 82 34 9E 39 - 82 34 A1 30 (offset 4613 - 4628)
- 0xFE8F, // U+49B6 is non-936 GB18030 value FE 8F.
- 0xFE8E, // U+49B7 is non-936 GB18030 value FE 8E.
- 0x02BF, // U+49B8 - U+4C76 ( 703 chars) are GB18030 82 34 A1 31 - 82 34 E7 33 (offset 4629 - 48E7)
- 0xFE96, // U+4C77 is non-936 GB18030 value FE 96.
- 0x0027, // U+4C78 - U+4C9E ( 39 chars) are GB18030 82 34 E7 34 - 82 34 EB 32 (offset 48E8 - 490E)
- 0xFE93, // U+4C9F is non-936 GB18030 value FE 93.
- 0xFE94, // U+4CA0 is non-936 GB18030 value FE 94.
- 0xFE95, // U+4CA1 is non-936 GB18030 value FE 95.
- 0xFE97, // U+4CA2 is non-936 GB18030 value FE 97.
- 0xFE92, // U+4CA3 is non-936 GB18030 value FE 92.
- 0x006F, // U+4CA4 - U+4D12 ( 111 chars) are GB18030 82 34 EB 33 - 82 34 F6 33 (offset 490F - 497D)
- 0xFE98, // U+4D13 is non-936 GB18030 value FE 98.
- 0xFE99, // U+4D14 is non-936 GB18030 value FE 99.
- 0xFE9A, // U+4D15 is non-936 GB18030 value FE 9A.
- 0xFE9B, // U+4D16 is non-936 GB18030 value FE 9B.
- 0xFE9C, // U+4D17 is non-936 GB18030 value FE 9C.
- 0xFE9D, // U+4D18 is non-936 GB18030 value FE 9D.
- 0xFE9E, // U+4D19 is non-936 GB18030 value FE 9E.
- 0x0094, // U+4D1A - U+4DAD ( 148 chars) are GB18030 82 34 F6 34 - 82 35 87 31 (offset 497E - 4A11)
- 0xFE9F, // U+4DAE is non-936 GB18030 value FE 9F.
- 0x0051, // U+4DAF - U+4DFF ( 81 chars) are GB18030 82 35 87 32 - 82 35 8F 32 (offset 4A12 - 4A62)
- 0xD1A6, // U+4E00 - U+9FA5 (20902 chars) use CP 936 conversion.
- 0x385A, // U+9FA6 - U+D7FF (14426 chars) are GB18030 82 35 8F 33 - 83 36 C7 38 (offset 4A63 - 82BC)
- 0x8F6C, // U+D800 - U+E76B ( 3948 chars) use CP 936 conversion.
- 0x0001, // U+E76C - U+E76C ( 1 chars) are GB18030 83 36 C7 39 - 83 36 C7 39 (offset 82BD - 82BD)
- 0x805B, // U+E76D - U+E7C7 ( 91 chars) use CP 936 conversion.
- 0x0001, // U+E7C8 - U+E7C8 ( 1 chars) are GB18030 83 36 C8 30 - 83 36 C8 30 (offset 82BE - 82BE)
- 0x801E, // U+E7C9 - U+E7E6 ( 30 chars) use CP 936 conversion.
- 0x000D, // U+E7E7 - U+E7F3 ( 13 chars) are GB18030 83 36 C8 31 - 83 36 C9 33 (offset 82BF - 82CB)
- 0x8021, // U+E7F4 - U+E814 ( 33 chars) use CP 936 conversion.
- 0x0001, // U+E815 - U+E815 ( 1 chars) are GB18030 83 36 C9 34 - 83 36 C9 34 (offset 82CC - 82CC)
- 0x8003, // U+E816 - U+E818 ( 3 chars) use CP 936 conversion.
- 0x0005, // U+E819 - U+E81D ( 5 chars) are GB18030 83 36 C9 35 - 83 36 C9 39 (offset 82CD - 82D1)
- 0x8001, // U+E81E - U+E81E ( 1 chars) use CP 936 conversion.
- 0x0007, // U+E81F - U+E825 ( 7 chars) are GB18030 83 36 CA 30 - 83 36 CA 36 (offset 82D2 - 82D8)
- 0x8001, // U+E826 - U+E826 ( 1 chars) use CP 936 conversion.
- 0x0004, // U+E827 - U+E82A ( 4 chars) are GB18030 83 36 CA 37 - 83 36 CB 30 (offset 82D9 - 82DC)
- 0x8002, // U+E82B - U+E82C ( 2 chars) use CP 936 conversion.
- 0x0004, // U+E82D - U+E830 ( 4 chars) are GB18030 83 36 CB 31 - 83 36 CB 34 (offset 82DD - 82E0)
- 0x8002, // U+E831 - U+E832 ( 2 chars) use CP 936 conversion.
- 0x0008, // U+E833 - U+E83A ( 8 chars) are GB18030 83 36 CB 35 - 83 36 CC 32 (offset 82E1 - 82E8)
- 0x8001, // U+E83B - U+E83B ( 1 chars) use CP 936 conversion.
- 0x0007, // U+E83C - U+E842 ( 7 chars) are GB18030 83 36 CC 33 - 83 36 CC 39 (offset 82E9 - 82EF)
- 0x8001, // U+E843 - U+E843 ( 1 chars) use CP 936 conversion.
- 0x0010, // U+E844 - U+E853 ( 16 chars) are GB18030 83 36 CD 30 - 83 36 CE 35 (offset 82F0 - 82FF)
- 0x8002, // U+E854 - U+E855 ( 2 chars) use CP 936 conversion.
- 0x000E, // U+E856 - U+E863 ( 14 chars) are GB18030 83 36 CE 36 - 83 36 CF 39 (offset 8300 - 830D)
- 0x8001, // U+E864 - U+E864 ( 1 chars) use CP 936 conversion.
- 0x10C7, // U+E865 - U+F92B ( 4295 chars) are GB18030 83 36 D0 30 - 84 30 85 34 (offset 830E - 93D4)
- 0x8001, // U+F92C - U+F92C ( 1 chars) use CP 936 conversion.
- 0x004C, // U+F92D - U+F978 ( 76 chars) are GB18030 84 30 85 35 - 84 30 8D 30 (offset 93D5 - 9420)
- 0x8001, // U+F979 - U+F979 ( 1 chars) use CP 936 conversion.
- 0x001B, // U+F97A - U+F994 ( 27 chars) are GB18030 84 30 8D 31 - 84 30 8F 37 (offset 9421 - 943B)
- 0x8001, // U+F995 - U+F995 ( 1 chars) use CP 936 conversion.
- 0x0051, // U+F996 - U+F9E6 ( 81 chars) are GB18030 84 30 8F 38 - 84 30 97 38 (offset 943C - 948C)
- 0x8001, // U+F9E7 - U+F9E7 ( 1 chars) use CP 936 conversion.
- 0x0009, // U+F9E8 - U+F9F0 ( 9 chars) are GB18030 84 30 97 39 - 84 30 98 37 (offset 948D - 9495)
- 0x8001, // U+F9F1 - U+F9F1 ( 1 chars) use CP 936 conversion.
- 0x001A, // U+F9F2 - U+FA0B ( 26 chars) are GB18030 84 30 98 38 - 84 30 9B 33 (offset 9496 - 94AF)
- 0x8004, // U+FA0C - U+FA0F ( 4 chars) use CP 936 conversion.
- 0x0001, // U+FA10 - U+FA10 ( 1 chars) are GB18030 84 30 9B 34 - 84 30 9B 34 (offset 94B0 - 94B0)
- 0x8001, // U+FA11 - U+FA11 ( 1 chars) use CP 936 conversion.
- 0x0001, // U+FA12 - U+FA12 ( 1 chars) are GB18030 84 30 9B 35 - 84 30 9B 35 (offset 94B1 - 94B1)
- 0x8002, // U+FA13 - U+FA14 ( 2 chars) use CP 936 conversion.
- 0x0003, // U+FA15 - U+FA17 ( 3 chars) are GB18030 84 30 9B 36 - 84 30 9B 38 (offset 94B2 - 94B4)
- 0x8001, // U+FA18 - U+FA18 ( 1 chars) use CP 936 conversion.
- 0x0006, // U+FA19 - U+FA1E ( 6 chars) are GB18030 84 30 9B 39 - 84 30 9C 34 (offset 94B5 - 94BA)
- 0x8003, // U+FA1F - U+FA21 ( 3 chars) use CP 936 conversion.
- 0x0001, // U+FA22 - U+FA22 ( 1 chars) are GB18030 84 30 9C 35 - 84 30 9C 35 (offset 94BB - 94BB)
- 0x8002, // U+FA23 - U+FA24 ( 2 chars) use CP 936 conversion.
- 0x0002, // U+FA25 - U+FA26 ( 2 chars) are GB18030 84 30 9C 36 - 84 30 9C 37 (offset 94BC - 94BD)
- 0x8003, // U+FA27 - U+FA29 ( 3 chars) use CP 936 conversion.
- 0x0406, // U+FA2A - U+FE2F ( 1030 chars) are GB18030 84 30 9C 38 - 84 31 85 37 (offset 94BE - 98C3)
- 0x8002, // U+FE30 - U+FE31 ( 2 chars) use CP 936 conversion.
- 0x0001, // U+FE32 - U+FE32 ( 1 chars) are GB18030 84 31 85 38 - 84 31 85 38 (offset 98C4 - 98C4)
- 0x8012, // U+FE33 - U+FE44 ( 18 chars) use CP 936 conversion.
- 0x0004, // U+FE45 - U+FE48 ( 4 chars) are GB18030 84 31 85 39 - 84 31 86 32 (offset 98C5 - 98C8)
- 0x800A, // U+FE49 - U+FE52 ( 10 chars) use CP 936 conversion.
- 0x0001, // U+FE53 - U+FE53 ( 1 chars) are GB18030 84 31 86 33 - 84 31 86 33 (offset 98C9 - 98C9)
- 0x8004, // U+FE54 - U+FE57 ( 4 chars) use CP 936 conversion.
- 0x0001, // U+FE58 - U+FE58 ( 1 chars) are GB18030 84 31 86 34 - 84 31 86 34 (offset 98CA - 98CA)
- 0x800E, // U+FE59 - U+FE66 ( 14 chars) use CP 936 conversion.
- 0x0001, // U+FE67 - U+FE67 ( 1 chars) are GB18030 84 31 86 35 - 84 31 86 35 (offset 98CB - 98CB)
- 0x8004, // U+FE68 - U+FE6B ( 4 chars) use CP 936 conversion.
- 0x0095, // U+FE6C - U+FF00 ( 149 chars) are GB18030 84 31 86 36 - 84 31 95 34 (offset 98CC - 9960)
- 0x805E, // U+FF01 - U+FF5E ( 94 chars) use CP 936 conversion.
- 0x0081, // U+FF5F - U+FFDF ( 129 chars) are GB18030 84 31 95 35 - 84 31 A2 33 (offset 9961 - 99E1)
- 0x8006, // U+FFE0 - U+FFE5 ( 6 chars) use CP 936 conversion.
- 0x001A, // U+FFE6 - U+FFFF ( 26 chars) are GB18030 84 31 A2 34 - 84 31 A4 39 (offset 99E2 - 99FB)
- };
- }
-}
-#endif // FEATURE_CODEPAGES_FILE
-
diff --git a/src/mscorlib/src/System/Text/ISCIIEncoding.cs b/src/mscorlib/src/System/Text/ISCIIEncoding.cs
deleted file mode 100644
index 751b8217c0..0000000000
--- a/src/mscorlib/src/System/Text/ISCIIEncoding.cs
+++ /dev/null
@@ -1,2621 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-// ISCIIEncoding
-//
-// Ported from windows c_iscii. If you find bugs here, there're likely similar
-// bugs in the windows version
-namespace System.Text
-{
- using System;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
- using System.Globalization;
- using System.Runtime.Serialization;
- using System.Security.Permissions;
-
- // Encodes text into and out of the ISCII encodings.
- // ISCII contains characters to encode indic scripts by mapping indic scripts
- // to the same code page. This works because they are all related scripts.
- // ISCII provides a "font" selection method to switch between the appropriate
- // fonts to display the other scripts. All ISCII characters are above the
- // ASCII range to provide ASCII compatibility.
- //
- // IsAlwaysNormalized() isn't overridden
- // We don't override IsAlwaysNormalized() because it is false for all forms (like base implimentation)
- // Forms C & KC have things like 0933 + 093C == composed 0934, so they aren't normalized
- // Forms D & KD have things like 0934, which decomposes to 0933 + 093C, so not normal.
- // Form IDNA has the above problems plus case mapping, so false (like most encodings)
- //
-
- [Serializable]
- internal class ISCIIEncoding : EncodingNLS, ISerializable
- {
- // Constants
- private const int CodeDefault = 0; // 0x40 Default
- private const int CodeRoman = 1; // 0x41 Roman Transliteration (not supported)
- private const int CodeDevanagari = 2; // 0x42 57002
- private const int CodeBengali = 3; // 0x43 57003
- private const int CodeTamil = 4; // 0x44 57004
- private const int CodeTelugu = 5; // 0x45 57005
- private const int CodeAssamese = 6; // 0x46 57006 Assamese (Bengali)
- private const int CodeOriya = 7; // 0x47 57007
- private const int CodeKannada = 8; // 0x48 57008
- private const int CodeMalayalam = 9; // 0x49 57009
- private const int CodeGujarati = 10; // 0x4a 57010
- private const int CodePunjabi = 11; // 0x4b 57011 Punjabi (Gurmukhi)
-
- // Ranges
- private const int MultiByteBegin = 0xa0; // Beginning of MultiByte space in ISCII
- private const int IndicBegin = 0x0901; // Beginining of Unicode Indic script code points
- private const int IndicEnd = 0x0d6f; // End of Unicode Indic Script code points
-
- // ISCII Control Values
- private const byte ControlATR = 0xef; // Attribute (ATR) code
- private const byte ControlCodePageStart = 0x40; // Start of code page range
-
- // Interesting ISCII characters
- private const byte Virama = 0xe8;
- private const byte Nukta = 0xe9;
- private const byte DevenagariExt = 0xf0;
-
- // Interesting Unicode characters
- private const char ZWNJ = (char)0x200c;
- private const char ZWJ = (char)0x200d;
-
- // Code Page
- private int defaultCodePage;
-
- public ISCIIEncoding(int codePage) : base(codePage)
- {
- // Set our code page (subtracting windows code page # offset)
- defaultCodePage = codePage - 57000;
-
- // Legal windows code pages are between Devanagari and Punjabi
- Debug.Assert(defaultCodePage >= CodeDevanagari && defaultCodePage <= CodePunjabi,
- "[ISCIIEncoding] Code page (" + codePage + " isn't supported by ISCIIEncoding!");
-
- // This shouldn't really be possible
- if (defaultCodePage < CodeDevanagari || defaultCodePage > CodePunjabi)
- throw new ArgumentException(Environment.GetResourceString(
- "Argument_CodepageNotSupported", codePage), nameof(codePage));
- }
-
- // Constructor called by serialization.
- internal ISCIIEncoding(SerializationInfo info, StreamingContext context) : base(0)
- {
- // Actually this can't ever get called, MLangCodePageEncoding is our proxy
- // (In Everett this was done by MLang)
- Debug.Assert(false, "Didn't expect to make it to ISCIIEncoding serialization constructor");
- throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
- }
-
- // ISerializable implementation
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // Make sure to get the base stuff too This throws if info is null
- SerializeEncoding(info, context);
- Debug.Assert(info!=null, "[ISCIIEncoding.GetObjectData] Expected null info to throw");
-
- // Just need Everett MLangCodePageEncoding maxCharSize
- info.AddValue("m_maxByteSize", 2);
-
- // Always have this as our helper
- info.SetType(typeof(MLangCodePageEncoding));
- }
-
- // Our MaxByteCount is 4 times the input size. That could be because
- // the first input character could be in the wrong code page ("font") and
- // then that character could also be encoded in 2 code points
- public override int GetMaxByteCount(int charCount)
- {
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(charCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
- long byteCount = (long)charCount + 1;
-
- if (EncoderFallback.MaxCharCount > 1)
- byteCount *= EncoderFallback.MaxCharCount;
-
- // 4 Time input because 1st input could require code page change and also that char could require 2 code points
- byteCount *= 4;
-
- if (byteCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
-
- return (int)byteCount;
- }
-
- // Our MaxCharCount is the same as the byteCount. There are a few sequences
- // where 2 (or more) bytes could become 2 chars, but thats still 1 to 1.
- public override int GetMaxCharCount(int byteCount)
- {
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(byteCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // Our MaxCharCount is the same as the byteCount. There are a few sequences
- // where 2 (or more) bytes could become 2 chars, but thats still 1 to 1.
- // Also could have 1 in decoder if we're waiting to see if next char's a nukta.
- long charCount = ((long)byteCount + 1);
-
- // Some code points are undefined so we could fall back.
- if (DecoderFallback.MaxCharCount > 1)
- charCount *= DecoderFallback.MaxCharCount;
-
- if (charCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
-
- return (int)charCount;
- }
-
- // Our workhorse version
- internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
- {
- // Use null pointer to ask GetBytes for count
- return GetBytes(chars, count, null, 0, baseEncoder);
- }
-
- // Workhorse
- internal override unsafe int GetBytes(char *chars, int charCount,
- byte* bytes, int byteCount, EncoderNLS baseEncoder)
- {
- // Allow null bytes for counting
- Debug.Assert(chars != null, "[ISCIIEncoding.GetBytes]chars!=null");
-// Debug.Assert(bytes != null, "[ISCIIEncoding.GetBytes]bytes!=null");
- Debug.Assert(charCount >=0, "[ISCIIEncoding.GetBytes]charCount >=0");
- Debug.Assert(byteCount >=0, "[ISCIIEncoding.GetBytes]byteCount >=0");
-
- // Need the ISCII Encoder
- ISCIIEncoder encoder = (ISCIIEncoder) baseEncoder;
-
- // prepare our helpers
- Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
- this, encoder, bytes, byteCount, chars, charCount);
-
- int currentCodePage = this.defaultCodePage;
- bool bLastVirama = false;
-
- // Use encoder info if available
- if (encoder != null)
- {
- // Remember our old state
- currentCodePage = encoder.currentCodePage;
- bLastVirama = encoder.bLastVirama;
-
- // If we have a high surrogate left over, then fall it back
- if (encoder.charLeftOver > 0)
- {
- buffer.Fallback(encoder.charLeftOver);
- bLastVirama = false; // Redundant
- }
- }
-
- while (buffer.MoreData)
- {
- // Get our data
- char ch = buffer.GetNextChar();
-
- // See if its a Multi Byte Character
- if (ch < MultiByteBegin)
- {
- // Its a boring low character, add it.
- if (!buffer.AddByte((byte)ch))
- break;
- bLastVirama = false;
- continue;
- }
-
- // See if its outside of the Indic script Range range
- if ((ch < IndicBegin) || (ch > IndicEnd))
- {
- // See if its a ZWJ or ZWNJ and if we has bLastVirama;
- if (bLastVirama && (ch == ZWNJ || ch == ZWJ))
- {
- // It was a bLastVirama and ZWNJ || ZWJ
- if (ch == ZWNJ)
- {
- if (!buffer.AddByte(Virama))
- break;
- }
- else // ZWJ
- {
- if (!buffer.AddByte(Nukta))
- break;
- }
-
- // bLastVirama now counts as false
- bLastVirama = false;
- continue;
- }
-
- // Have to do our fallback
- //
- // Note that this will fallback 2 chars if this is a high surrogate.
- // Throws if recursive (knows because we called InternalGetNextChar)
- buffer.Fallback(ch);
- bLastVirama = false;
- continue;
- }
-
- // Its in the Unicode Indic script range
- int indicInfo = UnicodeToIndicChar[ch - IndicBegin];
- byte byteIndic = (byte)indicInfo;
- int indicScript = (0x000f & (indicInfo >> 8));
- int indicTwoBytes = (0xf000 & indicInfo);
-
- // If IndicInfo is 0 then have to do fallback
- if (indicInfo == 0)
- {
- // Its some Unicode character we don't have indic for.
- // Have to do our fallback
- // Add Fallback Count
- // Note that chars was preincremented, and GetEncoderFallbackString might add an extra
- // if chars != charEnd and there's a surrogate.
- // Throws if recursive (knows because we called InternalGetNextChar)
- buffer.Fallback(ch);
-
- bLastVirama = false;
- continue;
- }
-
- // See if our code page ("font" in ISCII spec) has to change
- // (This if doesn't add character, just changes character set)
- Debug.Assert(indicScript!=0, "[ISCIIEncoding.GetBytes]expected an indic script value");
- if (indicScript != currentCodePage)
- {
- // It changed, spit out the ATR
- if (!buffer.AddByte(ControlATR, (byte)(indicScript | ControlCodePageStart)))
- break;
-
- // Now spit out the new code page (& remember it) (do this afterwards in case AddByte failed)
- currentCodePage = indicScript;
-
- // We only know how to map from Unicode to pages from Devanagari to Punjabi (2 to 11)
- Debug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi,
- "[ISCIIEncoding.GetBytes]Code page (" + currentCodePage + " shouldn't appear in ISCII from Unicode table!");
- }
-
- // Safe to add our byte now
- if (!buffer.AddByte(byteIndic, indicTwoBytes != 0 ? 1:0))
- break;
-
- // Remember if this one was a Virama
- bLastVirama = (byteIndic == Virama);
-
- // Some characters need extra bytes
- if (indicTwoBytes != 0)
- {
- // This one needs another byte
- Debug.Assert((indicTwoBytes >> 12) > 0 && (indicTwoBytes >> 12) <= 3,
- "[ISCIIEncoding.GetBytes]Expected indicTwoBytes from 1-3, not " + (indicTwoBytes >> 12));
-
- // Already did buffer checking, but...
- if (!buffer.AddByte(SecondIndicByte[indicTwoBytes >> 12]))
- break;
- }
- }
-
- // May need to switch back to our default code page
- if (currentCodePage != defaultCodePage && (encoder == null || encoder.MustFlush))
- {
- // It changed, spit out the ATR
- if (buffer.AddByte(ControlATR, (byte)(defaultCodePage | ControlCodePageStart)))
- currentCodePage = defaultCodePage;
- else
- // If not successful, convert will maintain state for next time, also
- // AddByte will have decremented our char count, however we need it to remain the same
- buffer.GetNextChar();
- bLastVirama = false;
- }
-
- // Make sure we remember our state if necessary
- // Note that we don't care about flush because Virama and code page
- // changes are legal at the end.
- // Don't set encoder if we're just counting
- if (encoder != null && bytes != null)
- {
- // Clear Encoder if necessary.
- if (!buffer.fallbackBuffer.bUsedEncoder)
- {
- encoder.charLeftOver = (char)0;
- }
-
- // Remember our code page/virama state
- encoder.currentCodePage = currentCodePage;
- encoder.bLastVirama = bLastVirama;
-
- // How many chars were used?
- encoder.m_charsUsed = buffer.CharsUsed;
- }
-
- // Return our length
- return buffer.Count;
- }
-
- // Workhorse
- internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
- {
- // Just call GetChars with null chars saying we want count
- return GetChars(bytes, count, null, 0, baseDecoder);
- }
-
- // For decoding, the following interesting rules apply:
- // Virama followed by another Virama or Nukta becomes Virama + ZWNJ or Virama + ZWJ
- // ATR is followed by a byte to switch code pages ("fonts")
- // Devenagari F0, B8 -> \u0952
- // Devenagari F0, BF -> \u0970
- // Some characters followed by E9 become a different character instead.
- internal override unsafe int GetChars(byte* bytes, int byteCount,
- char* chars, int charCount, DecoderNLS baseDecoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- // Allow null chars for counting
- Debug.Assert(bytes != null, "[ISCIIEncoding.GetChars]bytes is null");
- Debug.Assert(byteCount >= 0, "[ISCIIEncoding.GetChars]byteCount is negative");
-// Debug.Assert(chars != null, "[ISCIIEncoding.GetChars]chars is null");
- Debug.Assert(charCount >= 0, "[ISCIIEncoding.GetChars]charCount is negative");
-
- // Need the ISCII Decoder
- ISCIIDecoder decoder = (ISCIIDecoder) baseDecoder;
-
- // Get our info.
- Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
- this, decoder, chars, charCount, bytes, byteCount);
-
- int currentCodePage = this.defaultCodePage;
- bool bLastATR = false;
- bool bLastVirama = false;
- bool bLastDevenagariStressAbbr = false;
- char cLastCharForNextNukta = '\0';
- char cLastCharForNoNextNukta = '\0';
-
- // See if there's anything in our decoder
- if (decoder != null)
- {
- currentCodePage = decoder.currentCodePage;
- bLastATR = decoder.bLastATR;
- bLastVirama = decoder.bLastVirama;
- bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr;
- cLastCharForNextNukta = decoder.cLastCharForNextNukta;
- cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta;
- }
-
- bool bLastSpecial = bLastVirama | bLastATR | bLastDevenagariStressAbbr |
- (cLastCharForNextNukta != '\0');
-
- // Get our current code page index (some code pages are dups)
- int currentCodePageIndex = -1;
- Debug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi,
- "[ISCIIEncoding.GetChars]Decoder code page must be >= Devanagari and <= Punjabi, not " + currentCodePage);
-
- if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
- {
- currentCodePageIndex = IndicMappingIndex[currentCodePage];
- }
-
- // Loop through our input
- while (buffer.MoreData)
- {
- byte b = buffer.GetNextByte();
-
- // See if last one was special
- if (bLastSpecial)
- {
- // Now it won't be
- bLastSpecial = false;
-
- // One and only one of our flags should be set
- Debug.Assert(((bLastVirama ? 1 : 0) + (bLastATR ? 1 : 0) +
- (bLastDevenagariStressAbbr ? 1 : 0) +
- ((cLastCharForNextNukta > 0) ? 1 : 0)) == 1,
- String.Format(CultureInfo.InvariantCulture,
- "[ISCIIEncoding.GetChars]Special cases require 1 and only 1 special case flag: LastATR {0} Dev. {1} Nukta {2}",
- bLastATR, bLastDevenagariStressAbbr, cLastCharForNextNukta));
- // If the last one was an ATR, then we'll have to do ATR stuff
- if (bLastATR)
- {
- // We only support Devanagari - Punjabi
- if (b >= (0x40 | CodeDevanagari) && b <= (0x40 | CodePunjabi))
- {
- // Remember the code page
- currentCodePage = b & 0xf;
- currentCodePageIndex = IndicMappingIndex[currentCodePage];
- // No longer last ATR
- bLastATR = false;
- continue;
- }
-
- // Change back to default?
- if (b == 0x40)
- {
- currentCodePage = this.defaultCodePage;
- currentCodePageIndex = -1;
-
- if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
- {
- currentCodePageIndex = IndicMappingIndex[currentCodePage];
- }
- // No longer last ATR
- bLastATR = false;
- continue;
- }
-
- // We don't support Roman
- if (b == 0x41)
- {
- currentCodePage = this.defaultCodePage;
- currentCodePageIndex = -1;
-
- if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
- {
- currentCodePageIndex = IndicMappingIndex[currentCodePage];
- }
-
- // Even though we don't know how to support Roman, windows didn't add a ? so we don't either.
- // No longer last ATR
- bLastATR = false;
- continue;
- }
-
- // Other code pages & ATR codes not supported, fallback the ATR
- // If fails, decrements the buffer, which is OK, we remember ATR state.
- if (!buffer.Fallback(ControlATR))
- break;
-
- // No longer last ATR (fell back)
- bLastATR = false;
-
- // we know we can't have any of these other modes
- Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in bLastATR mode");
- Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in bLastATR mode");
- Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastATR mode");
- Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastATR mode");
-
- // Keep processing this byte
- }
- else if (bLastVirama)
- {
- // If last was Virama, then we might need ZWNJ or ZWJ instead
- if (b == Virama)
- {
- // If no room, then stop
- if (!buffer.AddChar(ZWNJ))
- break;
- bLastVirama = false;
- continue;
- }
- if (b == Nukta)
- {
- // If no room, then stop
- if (!buffer.AddChar(ZWJ))
- break;
- bLastVirama = false;
- continue;
- }
-
- // No longer in this mode, fall through to handle character
- // (Virama itself was added when flag was set last iteration)
- bLastVirama = false;
-
- // We know we can't have any of these other modes
- Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in bLastVirama mode");
- Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in bLastVirama mode");
- Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastVirama mode");
- Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastVirama mode");
- }
- else if (bLastDevenagariStressAbbr)
- {
- // Last byte was an 0xf0 (ext).
- // If current is b8 or bf, then we have 952 or 970. Otherwise fallback
- if (b == 0xb8)
- {
- // It was a 0xb8
- if (!buffer.AddChar('\x0952')) // Devanagari stress sign anudatta
- break;
- bLastDevenagariStressAbbr = false;
- continue;
- }
-
- if (b == 0xbf)
- {
- // It was a 0xbf
- if (!buffer.AddChar('\x0970')) // Devanagari abbr. sign
- break;
- bLastDevenagariStressAbbr = false;
- continue;
- }
-
- // Wasn't an expected pattern, do fallback for f0 (ext)
- // if fails, fallback will back up our buffer
- if (!buffer.Fallback(DevenagariExt))
- break;
-
- // Keep processing this byte (turn off mode)
- // (last character was added when mode was set)
- bLastDevenagariStressAbbr = false;
-
- Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in bLastDevenagariStressAbbr mode");
- Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in bLastDevenagariStressAbbr mode");
- Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastDevenagariStressAbbr mode");
- Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastDevenagariStressAbbr mode");
- }
- else
- {
- // We were checking for next char being a nukta
- Debug.Assert(cLastCharForNextNukta > 0 && cLastCharForNoNextNukta > 0,
- "[ISCIIEncoding.GetChars]No other special case found, but cLastCharFor(No)NextNukta variable(s) aren't set.");
-
- // We'll either add combined char or last char
- if (b == Nukta)
- {
- // We combine nukta with previous char
- if (!buffer.AddChar(cLastCharForNextNukta))
- break;
-
- // Done already
- cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
- continue;
- }
-
- // No Nukta, just add last character and keep processing current byte
- if (!buffer.AddChar(cLastCharForNoNextNukta))
- break;
-
- // Keep processing this byte, turn off mode.
- cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
-
- Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in cLastCharForNextNukta mode");
- Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in cLastCharForNextNukta mode");
- Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in cLastCharForNextNukta mode");
- }
- }
-
- // Now bLastSpecial should be false and all flags false.
- Debug.Assert (!bLastSpecial && !bLastDevenagariStressAbbr && !bLastVirama && !bLastATR &&
- cLastCharForNextNukta == '\0',
- "[ISCIIEncoding.GetChars]No special state for last code point should exist at this point.");
-
- // If its a simple byte, just add it
- if (b < MultiByteBegin)
- {
- if (!buffer.AddChar((char)b))
- break;
- continue;
- }
-
- // See if its an ATR marker
- if (b == ControlATR)
- {
- bLastATR = bLastSpecial = true;
- continue;
- }
-
- Debug.Assert (currentCodePageIndex != -1, "[ISCIIEncoding.GetChars]Expected valid currentCodePageIndex != -1");
- char ch = IndicMapping[currentCodePageIndex, 0, b - MultiByteBegin];
- char cAlt = IndicMapping[currentCodePageIndex, 1, b - MultiByteBegin];
-
- // If no 2nd char, just add it, also lonely Nuktas get added as well.
- if (cAlt == 0 || b == Nukta)
- {
- // If it was an unknown character do fallback
-
- // ? if not known.
- if (ch == 0)
- {
- // Fallback the unknown byte
- if (!buffer.Fallback(b))
- break;
- }
- else
- {
- // Add the known character
- if (!buffer.AddChar(ch))
- break;
- }
- continue;
- }
-
- // if b == Virama set last Virama so we can do ZWJ or ZWNJ next time if needed.
- if (b == Virama)
- {
- // Add Virama
- if (!buffer.AddChar(ch))
- break;
- bLastVirama = bLastSpecial = true;
- continue;
- }
-
- // See if its one that changes with a Nukta
- if ((cAlt & 0xF000) == 0)
- {
- // It could change if next char is a nukta
- bLastSpecial = true;
- cLastCharForNextNukta = cAlt;
- cLastCharForNoNextNukta = ch;
- continue;
- }
-
- // We must be the Devenagari special case for F0, B8 & F0, BF
- Debug.Assert(currentCodePage == CodeDevanagari && b == DevenagariExt,
- String.Format(CultureInfo.InvariantCulture,
- "[ISCIIEncoding.GetChars] Devenagari special case must {0} not {1} or in Devanagari code page {2} not {3}.",
- DevenagariExt, b, CodeDevanagari, currentCodePage));
- bLastDevenagariStressAbbr = bLastSpecial = true;
-
- }
-
- // If we don't have a decoder, or if we had to flush, then we need to get rid
- // of last ATR, LastNoNextNukta and LastDevenagariExt.
- if (decoder == null || decoder.MustFlush)
- {
- // If these fail (because of Convert with insufficient buffer), then they'll turn off MustFlush as well.
- if (bLastATR)
- {
- // Have to add ATR fallback
- if (buffer.Fallback(ControlATR))
- bLastATR = false;
- else
- // If not successful, convert will maintain state for next time, also
- // AddChar will have decremented our byte count, however we need it to remain the same
- buffer.GetNextByte();
- }
- else if (bLastDevenagariStressAbbr)
- {
- // Have to do fallback for DevenagariExt
- if (buffer.Fallback(DevenagariExt))
- bLastDevenagariStressAbbr = false;
- else
- // If not successful, convert will maintain state for next time, also
- // AddChar will have decremented our byte count, however we need it to remain the same
- buffer.GetNextByte();
- }
- else if (cLastCharForNoNextNukta != '\0')
- {
- // Have to add our last char because there was no next nukta
- if (buffer.AddChar(cLastCharForNoNextNukta))
- cLastCharForNoNextNukta = cLastCharForNextNukta = '\0';
- else
- // If not successful, convert will maintain state for next time, also
- // AddChar will have decremented our byte count, however we need it to remain the same
- buffer.GetNextByte();
- }
- // LastVirama is unimportant for flushing decoder.
- }
-
- // Remember any left over stuff
- // (only remember if we aren't counting)
- if (decoder != null && chars != null)
- {
- // If not flushing or have state (from convert) then need to remember state
- if (!decoder.MustFlush ||
- cLastCharForNoNextNukta != '\0' || bLastATR || bLastDevenagariStressAbbr)
- {
- // Either not flushing or had state (from convert)
- Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
- "[ISCIIEncoding.GetChars]Expected no state or not converting or not flushing");
- decoder.currentCodePage = currentCodePage;
- decoder.bLastVirama = bLastVirama;
- decoder.bLastATR = bLastATR;
- decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr;
- decoder.cLastCharForNextNukta = cLastCharForNextNukta;
- decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta;
- }
- else
- {
- decoder.currentCodePage = this.defaultCodePage;
- decoder.bLastVirama = false;
- decoder.bLastATR = false;
- decoder.bLastDevenagariStressAbbr = false;
- decoder.cLastCharForNextNukta = '\0';
- decoder.cLastCharForNoNextNukta = '\0';
- }
- decoder.m_bytesUsed = buffer.BytesUsed;
- }
- // Otherwise we already did fallback and added extra things
-
- // Return the # of characters we found
- return buffer.Count;
- }
-
- public override Decoder GetDecoder()
- {
- return new ISCIIDecoder(this);
- }
-
- public override Encoder GetEncoder()
- {
- return new ISCIIEncoder(this);
- }
-
- public override int GetHashCode()
- {
- //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
- return defaultCodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
- }
-
- [Serializable]
- internal class ISCIIEncoder : EncoderNLS
- {
- // Need to remember the default code page (for HasState)
- internal int defaultCodePage = 0;
-
- // Need a place for the current code page
- internal int currentCodePage = 0;
-
- // Was the last character a virama? (Because ZWJ and ZWNJ are different then)
- internal bool bLastVirama = false;
-
- public ISCIIEncoder(Encoding encoding) : base(encoding)
- {
- this.currentCodePage = this.defaultCodePage = encoding.CodePage - 57000;
-
- // base calls reset
- }
-
- // Warning: If you're decoding mixed encoding files or something, this could be confusing
- // We don't always force back to base encoding mapping, so if you reset where do you restart?
- public override void Reset()
- {
- bLastVirama = false;
- charLeftOver = (char)0;
- if (m_fallbackBuffer != null)
- m_fallbackBuffer.Reset();
- }
-
- // Anything left in our encoder?
- // Encoder not only has to get rid of left over characters, but it has to switch back to the current code page.
- internal override bool HasState
- {
- get
- {
- return (this.charLeftOver != (char)0 || this.currentCodePage != this.defaultCodePage);
- }
- }
- }
-
- [Serializable]
- internal class ISCIIDecoder : DecoderNLS
- {
- // Need a place to store any our current code page and last ATR flag
- internal int currentCodePage = 0;
- internal bool bLastATR = false;
- internal bool bLastVirama = false;
- internal bool bLastDevenagariStressAbbr = false;
- internal char cLastCharForNextNukta = '\0';
- internal char cLastCharForNoNextNukta = '\0';
-
- public ISCIIDecoder(Encoding encoding) : base(encoding)
- {
- this.currentCodePage = encoding.CodePage - 57000;
-
- // base calls reset
- }
-
- // Warning: If you're decoding mixed encoding files or something, this could be confusing
- // We don't always force back to base encoding mapping, so if you reset where do you restart?
- public override void Reset()
- {
- bLastATR = false;
- bLastVirama = false;
- bLastDevenagariStressAbbr = false;
- cLastCharForNextNukta = '\0';
- cLastCharForNoNextNukta = '\0';
- if (m_fallbackBuffer != null)
- m_fallbackBuffer.Reset();
- }
-
- // Anything left in our decoder?
- internal override bool HasState
- {
- get
- {
- return (this.cLastCharForNextNukta != '\0' || this.cLastCharForNoNextNukta != '\0' ||
- this.bLastATR || this.bLastDevenagariStressAbbr);
- }
- }
- }
-
- //
- // ISCII Tables
- //
- // From Windows ISCII\tables.c
- //
-
- ////////////////////////////////////////////////////////////////////////////
- //
- // Char to Byte
- //
- // 0xXYZZ Where Y is the code page "font" part and ZZ is the byte character
- // The high X bits also reference the SecondIndicByte table if an
- // extra byte is needed.
- // 0x0000 For undefined characters
- //
- // This is valid for values IndicBegin to IndicEnd
- //
- // WARNING: When this was copied from windows, the ? characters (0x003F) were
- // searched/replaced with 0x0000.
- //
- ////////////////////////////////////////////////////////////////////////////
-
- static int[] UnicodeToIndicChar =
- {
- 0x02a1, // U+0901 : Devanagari Sign Candrabindu
- 0x02a2, // U+0902 : Devanagari Sign Anusvara
- 0x02a3, // U+0903 : Devanagari Sign Visarga
- 0x0000, // U+0904 : Undefined
- 0x02a4, // U+0905 : Devanagari Letter A
- 0x02a5, // U+0906 : Devanagari Letter Aa
- 0x02a6, // U+0907 : Devanagari Letter I
- 0x02a7, // U+0908 : Devanagari Letter Ii
- 0x02a8, // U+0909 : Devanagari Letter U
- 0x02a9, // U+090a : Devanagari Letter Uu
- 0x02aa, // U+090b : Devanagari Letter Vocalic R
- 0x12a6, // U+090c : Devanagari Letter Vocalic L
- 0x02ae, // U+090d : Devanagari Letter Candra E
- 0x02ab, // U+090e : Devanagari Letter Short E
- 0x02ac, // U+090f : Devanagari Letter E
- 0x02ad, // U+0910 : Devanagari Letter Ai
- 0x02b2, // U+0911 : Devanagari Letter Candra O
- 0x02af, // U+0912 : Devanagari Letter Short O
- 0x02b0, // U+0913 : Devanagari Letter O
- 0x02b1, // U+0914 : Devanagari Letter Au
- 0x02b3, // U+0915 : Devanagari Letter Ka
- 0x02b4, // U+0916 : Devanagari Letter Kha
- 0x02b5, // U+0917 : Devanagari Letter Ga
- 0x02b6, // U+0918 : Devanagari Letter Gha
- 0x02b7, // U+0919 : Devanagari Letter Nga
- 0x02b8, // U+091a : Devanagari Letter Ca
- 0x02b9, // U+091b : Devanagari Letter Cha
- 0x02ba, // U+091c : Devanagari Letter Ja
- 0x02bb, // U+091d : Devanagari Letter Jha
- 0x02bc, // U+091e : Devanagari Letter Nya
- 0x02bd, // U+091f : Devanagari Letter Tta
- 0x02be, // U+0920 : Devanagari Letter Ttha
- 0x02bf, // U+0921 : Devanagari Letter Dda
- 0x02c0, // U+0922 : Devanagari Letter Ddha
- 0x02c1, // U+0923 : Devanagari Letter Nna
- 0x02c2, // U+0924 : Devanagari Letter Ta
- 0x02c3, // U+0925 : Devanagari Letter Tha
- 0x02c4, // U+0926 : Devanagari Letter Da
- 0x02c5, // U+0927 : Devanagari Letter Dha
- 0x02c6, // U+0928 : Devanagari Letter Na
- 0x02c7, // U+0929 : Devanagari Letter Nnna
- 0x02c8, // U+092a : Devanagari Letter Pa
- 0x02c9, // U+092b : Devanagari Letter Pha
- 0x02ca, // U+092c : Devanagari Letter Ba
- 0x02cb, // U+092d : Devanagari Letter Bha
- 0x02cc, // U+092e : Devanagari Letter Ma
- 0x02cd, // U+092f : Devanagari Letter Ya
- 0x02cf, // U+0930 : Devanagari Letter Ra
- 0x02d0, // U+0931 : Devanagari Letter Rra
- 0x02d1, // U+0932 : Devanagari Letter La
- 0x02d2, // U+0933 : Devanagari Letter Lla
- 0x02d3, // U+0934 : Devanagari Letter Llla
- 0x02d4, // U+0935 : Devanagari Letter Va
- 0x02d5, // U+0936 : Devanagari Letter Sha
- 0x02d6, // U+0937 : Devanagari Letter Ssa
- 0x02d7, // U+0938 : Devanagari Letter Sa
- 0x02d8, // U+0939 : Devanagari Letter Ha
- 0x0000, // U+093a : Undefined
- 0x0000, // U+093b : Undefined
- 0x02e9, // U+093c : Devanagari Sign Nukta
- 0x12ea, // U+093d : Devanagari Sign Avagraha
- 0x02da, // U+093e : Devanagari Vowel Sign Aa
- 0x02db, // U+093f : Devanagari Vowel Sign I
- 0x02dc, // U+0940 : Devanagari Vowel Sign Ii
- 0x02dd, // U+0941 : Devanagari Vowel Sign U
- 0x02de, // U+0942 : Devanagari Vowel Sign Uu
- 0x02df, // U+0943 : Devanagari Vowel Sign Vocalic R
- 0x12df, // U+0944 : Devanagari Vowel Sign Vocalic Rr
- 0x02e3, // U+0945 : Devanagari Vowel Sign Candra E
- 0x02e0, // U+0946 : Devanagari Vowel Sign Short E
- 0x02e1, // U+0947 : Devanagari Vowel Sign E
- 0x02e2, // U+0948 : Devanagari Vowel Sign Ai
- 0x02e7, // U+0949 : Devanagari Vowel Sign Candra O
- 0x02e4, // U+094a : Devanagari Vowel Sign Short O
- 0x02e5, // U+094b : Devanagari Vowel Sign O
- 0x02e6, // U+094c : Devanagari Vowel Sign Au
- 0x02e8, // U+094d : Devanagari Sign Virama
- 0x0000, // U+094e : Undefined
- 0x0000, // U+094f : Undefined
- 0x12a1, // U+0950 : Devanagari Om
- 0x0000, // U+0951 : Devanagari Stress Sign Udatta
- 0x22f0, // U+0952 : Devanagari Stress Sign Anudatta
- 0x0000, // U+0953 : Devanagari Grave Accent
- 0x0000, // U+0954 : Devanagari Acute Accent
- 0x0000, // U+0955 : Undefined
- 0x0000, // U+0956 : Undefined
- 0x0000, // U+0957 : Undefined
- 0x12b3, // U+0958 : Devanagari Letter Qa
- 0x12b4, // U+0959 : Devanagari Letter Khha
- 0x12b5, // U+095a : Devanagari Letter Ghha
- 0x12ba, // U+095b : Devanagari Letter Za
- 0x12bf, // U+095c : Devanagari Letter Dddha
- 0x12c0, // U+095d : Devanagari Letter Rha
- 0x12c9, // U+095e : Devanagari Letter Fa
- 0x02ce, // U+095f : Devanagari Letter Yya
- 0x12aa, // U+0960 : Devanagari Letter Vocalic Rr
- 0x12a7, // U+0961 : Devanagari Letter Vocalic Ll
- 0x12db, // U+0962 : Devanagari Vowel Sign Vocalic L
- 0x12dc, // U+0963 : Devanagari Vowel Sign Vocalic Ll
- 0x02ea, // U+0964 : Devanagari Danda
- 0x0000, // U+0965 : Devanagari Double Danda
- 0x02f1, // U+0966 : Devanagari Digit Zero
- 0x02f2, // U+0967 : Devanagari Digit One
- 0x02f3, // U+0968 : Devanagari Digit Two
- 0x02f4, // U+0969 : Devanagari Digit Three
- 0x02f5, // U+096a : Devanagari Digit Four
- 0x02f6, // U+096b : Devanagari Digit Five
- 0x02f7, // U+096c : Devanagari Digit Six
- 0x02f8, // U+096d : Devanagari Digit Seven
- 0x02f9, // U+096e : Devanagari Digit Eight
- 0x02fa, // U+096f : Devanagari Digit Nine
- 0x32f0, // U+0970 : Devanagari Abbreviation Sign
- 0x0000, // U+0971 : Undefined
- 0x0000, // U+0972 : Undefined
- 0x0000, // U+0973 : Undefined
- 0x0000, // U+0974 : Undefined
- 0x0000, // U+0975 : Undefined
- 0x0000, // U+0976 : Undefined
- 0x0000, // U+0977 : Undefined
- 0x0000, // U+0978 : Undefined
- 0x0000, // U+0979 : Undefined
- 0x0000, // U+097a : Undefined
- 0x0000, // U+097b : Undefined
- 0x0000, // U+097c : Undefined
- 0x0000, // U+097d : Undefined
- 0x0000, // U+097e : Undefined
- 0x0000, // U+097f : Undefined
- 0x0000, // U+0980 : Undefined
- 0x03a1, // U+0981 : Bengali Sign Candrabindu
- 0x03a2, // U+0982 : Bengali Sign Anusvara
- 0x03a3, // U+0983 : Bengali Sign Visarga
- 0x0000, // U+0984 : Undefined
- 0x03a4, // U+0985 : Bengali Letter A
- 0x03a5, // U+0986 : Bengali Letter Aa
- 0x03a6, // U+0987 : Bengali Letter I
- 0x03a7, // U+0988 : Bengali Letter Ii
- 0x03a8, // U+0989 : Bengali Letter U
- 0x03a9, // U+098a : Bengali Letter Uu
- 0x03aa, // U+098b : Bengali Letter Vocalic R
- 0x13a6, // U+098c : Bengali Letter Vocalic L
- 0x0000, // U+098d : Undefined
- 0x0000, // U+098e : Undefined
- 0x03ab, // U+098f : Bengali Letter E
- 0x03ad, // U+0990 : Bengali Letter Ai
- 0x0000, // U+0991 : Undefined
- 0x0000, // U+0992 : Undefined
- 0x03af, // U+0993 : Bengali Letter O
- 0x03b1, // U+0994 : Bengali Letter Au
- 0x03b3, // U+0995 : Bengali Letter Ka
- 0x03b4, // U+0996 : Bengali Letter Kha
- 0x03b5, // U+0997 : Bengali Letter Ga
- 0x03b6, // U+0998 : Bengali Letter Gha
- 0x03b7, // U+0999 : Bengali Letter Nga
- 0x03b8, // U+099a : Bengali Letter Ca
- 0x03b9, // U+099b : Bengali Letter Cha
- 0x03ba, // U+099c : Bengali Letter Ja
- 0x03bb, // U+099d : Bengali Letter Jha
- 0x03bc, // U+099e : Bengali Letter Nya
- 0x03bd, // U+099f : Bengali Letter Tta
- 0x03be, // U+09a0 : Bengali Letter Ttha
- 0x03bf, // U+09a1 : Bengali Letter Dda
- 0x03c0, // U+09a2 : Bengali Letter Ddha
- 0x03c1, // U+09a3 : Bengali Letter Nna
- 0x03c2, // U+09a4 : Bengali Letter Ta
- 0x03c3, // U+09a5 : Bengali Letter Tha
- 0x03c4, // U+09a6 : Bengali Letter Da
- 0x03c5, // U+09a7 : Bengali Letter Dha
- 0x03c6, // U+09a8 : Bengali Letter Na
- 0x0000, // U+09a9 : Undefined
- 0x03c8, // U+09aa : Bengali Letter Pa
- 0x03c9, // U+09ab : Bengali Letter Pha
- 0x03ca, // U+09ac : Bengali Letter Ba
- 0x03cb, // U+09ad : Bengali Letter Bha
- 0x03cc, // U+09ae : Bengali Letter Ma
- 0x03cd, // U+09af : Bengali Letter Ya
- 0x03cf, // U+09b0 : Bengali Letter Ra
- 0x0000, // U+09b1 : Undefined
- 0x03d1, // U+09b2 : Bengali Letter La
- 0x0000, // U+09b3 : Undefined
- 0x0000, // U+09b4 : Undefined
- 0x0000, // U+09b5 : Undefined
- 0x03d5, // U+09b6 : Bengali Letter Sha
- 0x03d6, // U+09b7 : Bengali Letter Ssa
- 0x03d7, // U+09b8 : Bengali Letter Sa
- 0x03d8, // U+09b9 : Bengali Letter Ha
- 0x0000, // U+09ba : Undefined
- 0x0000, // U+09bb : Undefined
- 0x03e9, // U+09bc : Bengali Sign Nukta
- 0x0000, // U+09bd : Undefined
- 0x03da, // U+09be : Bengali Vowel Sign Aa
- 0x03db, // U+09bf : Bengali Vowel Sign I
- 0x03dc, // U+09c0 : Bengali Vowel Sign Ii
- 0x03dd, // U+09c1 : Bengali Vowel Sign U
- 0x03de, // U+09c2 : Bengali Vowel Sign Uu
- 0x03df, // U+09c3 : Bengali Vowel Sign Vocalic R
- 0x13df, // U+09c4 : Bengali Vowel Sign Vocalic Rr
- 0x0000, // U+09c5 : Undefined
- 0x0000, // U+09c6 : Undefined
- 0x03e0, // U+09c7 : Bengali Vowel Sign E
- 0x03e2, // U+09c8 : Bengali Vowel Sign Ai
- 0x0000, // U+09c9 : Undefined
- 0x0000, // U+09ca : Undefined
- 0x03e4, // U+09cb : Bengali Vowel Sign O
- 0x03e6, // U+09cc : Bengali Vowel Sign Au
- 0x03e8, // U+09cd : Bengali Sign Virama
- 0x0000, // U+09ce : Undefined
- 0x0000, // U+09cf : Undefined
- 0x0000, // U+09d0 : Undefined
- 0x0000, // U+09d1 : Undefined
- 0x0000, // U+09d2 : Undefined
- 0x0000, // U+09d3 : Undefined
- 0x0000, // U+09d4 : Undefined
- 0x0000, // U+09d5 : Undefined
- 0x0000, // U+09d6 : Undefined
- 0x0000, // U+09d7 : Bengali Au Length Mark
- 0x0000, // U+09d8 : Undefined
- 0x0000, // U+09d9 : Undefined
- 0x0000, // U+09da : Undefined
- 0x0000, // U+09db : Undefined
- 0x13bf, // U+09dc : Bengali Letter Rra
- 0x13c0, // U+09dd : Bengali Letter Rha
- 0x0000, // U+09de : Undefined
- 0x03ce, // U+09df : Bengali Letter Yya
- 0x13aa, // U+09e0 : Bengali Letter Vocalic Rr
- 0x13a7, // U+09e1 : Bengali Letter Vocalic Ll
- 0x13db, // U+09e2 : Bengali Vowel Sign Vocalic L
- 0x13dc, // U+09e3 : Bengali Vowel Sign Vocalic Ll
- 0x0000, // U+09e4 : Undefined
- 0x0000, // U+09e5 : Undefined
- 0x03f1, // U+09e6 : Bengali Digit Zero
- 0x03f2, // U+09e7 : Bengali Digit One
- 0x03f3, // U+09e8 : Bengali Digit Two
- 0x03f4, // U+09e9 : Bengali Digit Three
- 0x03f5, // U+09ea : Bengali Digit Four
- 0x03f6, // U+09eb : Bengali Digit Five
- 0x03f7, // U+09ec : Bengali Digit Six
- 0x03f8, // U+09ed : Bengali Digit Seven
- 0x03f9, // U+09ee : Bengali Digit Eight
- 0x03fa, // U+09ef : Bengali Digit Nine
- 0x0000, // U+09f0 : Bengali Letter Ra With Middle Diagonal
- 0x0000, // U+09f1 : Bengali Letter Ra With Lower Diagonal
- 0x0000, // U+09f2 : Bengali Rupee Mark
- 0x0000, // U+09f3 : Bengali Rupee Sign
- 0x0000, // U+09f4 : Bengali Currency Numerator One
- 0x0000, // U+09f5 : Bengali Currency Numerator Two
- 0x0000, // U+09f6 : Bengali Currency Numerator Three
- 0x0000, // U+09f7 : Bengali Currency Numerator Four
- 0x0000, // U+09f8 : Bengali Currency Numerator One Less Than The Denominator
- 0x0000, // U+09f9 : Bengali Currency Denominator Sixteen
- 0x0000, // U+09fa : Bengali Isshar
- 0x0000, // U+09fb : Undefined
- 0x0000, // U+09fc : Undefined
- 0x0000, // U+09fd : Undefined
- 0x0000, // U+09fe : Undefined
- 0x0000, // U+09ff : Undefined
- 0x0000, // U+0a00 : Undefined
- 0x0000, // U+0a01 : Undefined
- 0x0ba2, // U+0a02 : Gurmukhi Sign Bindi
- 0x0000, // U+0a03 : Undefined
- 0x0000, // U+0a04 : Undefined
- 0x0ba4, // U+0a05 : Gurmukhi Letter A
- 0x0ba5, // U+0a06 : Gurmukhi Letter Aa
- 0x0ba6, // U+0a07 : Gurmukhi Letter I
- 0x0ba7, // U+0a08 : Gurmukhi Letter Ii
- 0x0ba8, // U+0a09 : Gurmukhi Letter U
- 0x0ba9, // U+0a0a : Gurmukhi Letter Uu
- 0x0000, // U+0a0b : Undefined
- 0x0000, // U+0a0c : Undefined
- 0x0000, // U+0a0d : Undefined
- 0x0000, // U+0a0e : Undefined
- 0x0bab, // U+0a0f : Gurmukhi Letter Ee
- 0x0bad, // U+0a10 : Gurmukhi Letter Ai
- 0x0000, // U+0a11 : Undefined
- 0x0000, // U+0a12 : Undefined
- 0x0bb0, // U+0a13 : Gurmukhi Letter Oo
- 0x0bb1, // U+0a14 : Gurmukhi Letter Au
- 0x0bb3, // U+0a15 : Gurmukhi Letter Ka
- 0x0bb4, // U+0a16 : Gurmukhi Letter Kha
- 0x0bb5, // U+0a17 : Gurmukhi Letter Ga
- 0x0bb6, // U+0a18 : Gurmukhi Letter Gha
- 0x0bb7, // U+0a19 : Gurmukhi Letter Nga
- 0x0bb8, // U+0a1a : Gurmukhi Letter Ca
- 0x0bb9, // U+0a1b : Gurmukhi Letter Cha
- 0x0bba, // U+0a1c : Gurmukhi Letter Ja
- 0x0bbb, // U+0a1d : Gurmukhi Letter Jha
- 0x0bbc, // U+0a1e : Gurmukhi Letter Nya
- 0x0bbd, // U+0a1f : Gurmukhi Letter Tta
- 0x0bbe, // U+0a20 : Gurmukhi Letter Ttha
- 0x0bbf, // U+0a21 : Gurmukhi Letter Dda
- 0x0bc0, // U+0a22 : Gurmukhi Letter Ddha
- 0x0bc1, // U+0a23 : Gurmukhi Letter Nna
- 0x0bc2, // U+0a24 : Gurmukhi Letter Ta
- 0x0bc3, // U+0a25 : Gurmukhi Letter Tha
- 0x0bc4, // U+0a26 : Gurmukhi Letter Da
- 0x0bc5, // U+0a27 : Gurmukhi Letter Dha
- 0x0bc6, // U+0a28 : Gurmukhi Letter Na
- 0x0000, // U+0a29 : Undefined
- 0x0bc8, // U+0a2a : Gurmukhi Letter Pa
- 0x0bc9, // U+0a2b : Gurmukhi Letter Pha
- 0x0bca, // U+0a2c : Gurmukhi Letter Ba
- 0x0bcb, // U+0a2d : Gurmukhi Letter Bha
- 0x0bcc, // U+0a2e : Gurmukhi Letter Ma
- 0x0bcd, // U+0a2f : Gurmukhi Letter Ya
- 0x0bcf, // U+0a30 : Gurmukhi Letter Ra
- 0x0000, // U+0a31 : Undefined
- 0x0bd1, // U+0a32 : Gurmukhi Letter La
- 0x0bd2, // U+0a33 : Gurmukhi Letter Lla
- 0x0000, // U+0a34 : Undefined
- 0x0bd4, // U+0a35 : Gurmukhi Letter Va
- 0x0bd5, // U+0a36 : Gurmukhi Letter Sha
- 0x0000, // U+0a37 : Undefined
- 0x0bd7, // U+0a38 : Gurmukhi Letter Sa
- 0x0bd8, // U+0a39 : Gurmukhi Letter Ha
- 0x0000, // U+0a3a : Undefined
- 0x0000, // U+0a3b : Undefined
- 0x0be9, // U+0a3c : Gurmukhi Sign Nukta
- 0x0000, // U+0a3d : Undefined
- 0x0bda, // U+0a3e : Gurmukhi Vowel Sign Aa
- 0x0bdb, // U+0a3f : Gurmukhi Vowel Sign I
- 0x0bdc, // U+0a40 : Gurmukhi Vowel Sign Ii
- 0x0bdd, // U+0a41 : Gurmukhi Vowel Sign U
- 0x0bde, // U+0a42 : Gurmukhi Vowel Sign Uu
- 0x0000, // U+0a43 : Undefined
- 0x0000, // U+0a44 : Undefined
- 0x0000, // U+0a45 : Undefined
- 0x0000, // U+0a46 : Undefined
- 0x0be0, // U+0a47 : Gurmukhi Vowel Sign Ee
- 0x0be2, // U+0a48 : Gurmukhi Vowel Sign Ai
- 0x0000, // U+0a49 : Undefined
- 0x0000, // U+0a4a : Undefined
- 0x0be4, // U+0a4b : Gurmukhi Vowel Sign Oo
- 0x0be6, // U+0a4c : Gurmukhi Vowel Sign Au
- 0x0be8, // U+0a4d : Gurmukhi Sign Virama
- 0x0000, // U+0a4e : Undefined
- 0x0000, // U+0a4f : Undefined
- 0x0000, // U+0a50 : Undefined
- 0x0000, // U+0a51 : Undefined
- 0x0000, // U+0a52 : Undefined
- 0x0000, // U+0a53 : Undefined
- 0x0000, // U+0a54 : Undefined
- 0x0000, // U+0a55 : Undefined
- 0x0000, // U+0a56 : Undefined
- 0x0000, // U+0a57 : Undefined
- 0x0000, // U+0a58 : Undefined
- 0x1bb4, // U+0a59 : Gurmukhi Letter Khha
- 0x1bb5, // U+0a5a : Gurmukhi Letter Ghha
- 0x1bba, // U+0a5b : Gurmukhi Letter Za
- 0x1bc0, // U+0a5c : Gurmukhi Letter Rra
- 0x0000, // U+0a5d : Undefined
- 0x1bc9, // U+0a5e : Gurmukhi Letter Fa
- 0x0000, // U+0a5f : Undefined
- 0x0000, // U+0a60 : Undefined
- 0x0000, // U+0a61 : Undefined
- 0x0000, // U+0a62 : Undefined
- 0x0000, // U+0a63 : Undefined
- 0x0000, // U+0a64 : Undefined
- 0x0000, // U+0a65 : Undefined
- 0x0bf1, // U+0a66 : Gurmukhi Digit Zero
- 0x0bf2, // U+0a67 : Gurmukhi Digit One
- 0x0bf3, // U+0a68 : Gurmukhi Digit Two
- 0x0bf4, // U+0a69 : Gurmukhi Digit Three
- 0x0bf5, // U+0a6a : Gurmukhi Digit Four
- 0x0bf6, // U+0a6b : Gurmukhi Digit Five
- 0x0bf7, // U+0a6c : Gurmukhi Digit Six
- 0x0bf8, // U+0a6d : Gurmukhi Digit Seven
- 0x0bf9, // U+0a6e : Gurmukhi Digit Eight
- 0x0bfa, // U+0a6f : Gurmukhi Digit Nine
- 0x0000, // U+0a70 : Gurmukhi Tippi
- 0x0000, // U+0a71 : Gurmukhi Addak
- 0x0000, // U+0a72 : Gurmukhi Iri
- 0x0000, // U+0a73 : Gurmukhi Ura
- 0x0000, // U+0a74 : Gurmukhi Ek Onkar
- 0x0000, // U+0a75 : Undefined
- 0x0000, // U+0a76 : Undefined
- 0x0000, // U+0a77 : Undefined
- 0x0000, // U+0a78 : Undefined
- 0x0000, // U+0a79 : Undefined
- 0x0000, // U+0a7a : Undefined
- 0x0000, // U+0a7b : Undefined
- 0x0000, // U+0a7c : Undefined
- 0x0000, // U+0a7d : Undefined
- 0x0000, // U+0a7e : Undefined
- 0x0000, // U+0a7f : Undefined
- 0x0000, // U+0a80 : Undefined
- 0x0aa1, // U+0a81 : Gujarati Sign Candrabindu
- 0x0aa2, // U+0a82 : Gujarati Sign Anusvara
- 0x0aa3, // U+0a83 : Gujarati Sign Visarga
- 0x0000, // U+0a84 : Undefined
- 0x0aa4, // U+0a85 : Gujarati Letter A
- 0x0aa5, // U+0a86 : Gujarati Letter Aa
- 0x0aa6, // U+0a87 : Gujarati Letter I
- 0x0aa7, // U+0a88 : Gujarati Letter Ii
- 0x0aa8, // U+0a89 : Gujarati Letter U
- 0x0aa9, // U+0a8a : Gujarati Letter Uu
- 0x0aaa, // U+0a8b : Gujarati Letter Vocalic R
- 0x0000, // U+0a8c : Undefined
- 0x0aae, // U+0a8d : Gujarati Vowel Candra E
- 0x0000, // U+0a8e : Undefined
- 0x0aab, // U+0a8f : Gujarati Letter E
- 0x0aad, // U+0a90 : Gujarati Letter Ai
- 0x0ab2, // U+0a91 : Gujarati Vowel Candra O
- 0x0000, // U+0a92 : Undefined
- 0x0ab0, // U+0a93 : Gujarati Letter O
- 0x0ab1, // U+0a94 : Gujarati Letter Au
- 0x0ab3, // U+0a95 : Gujarati Letter Ka
- 0x0ab4, // U+0a96 : Gujarati Letter Kha
- 0x0ab5, // U+0a97 : Gujarati Letter Ga
- 0x0ab6, // U+0a98 : Gujarati Letter Gha
- 0x0ab7, // U+0a99 : Gujarati Letter Nga
- 0x0ab8, // U+0a9a : Gujarati Letter Ca
- 0x0ab9, // U+0a9b : Gujarati Letter Cha
- 0x0aba, // U+0a9c : Gujarati Letter Ja
- 0x0abb, // U+0a9d : Gujarati Letter Jha
- 0x0abc, // U+0a9e : Gujarati Letter Nya
- 0x0abd, // U+0a9f : Gujarati Letter Tta
- 0x0abe, // U+0aa0 : Gujarati Letter Ttha
- 0x0abf, // U+0aa1 : Gujarati Letter Dda
- 0x0ac0, // U+0aa2 : Gujarati Letter Ddha
- 0x0ac1, // U+0aa3 : Gujarati Letter Nna
- 0x0ac2, // U+0aa4 : Gujarati Letter Ta
- 0x0ac3, // U+0aa5 : Gujarati Letter Tha
- 0x0ac4, // U+0aa6 : Gujarati Letter Da
- 0x0ac5, // U+0aa7 : Gujarati Letter Dha
- 0x0ac6, // U+0aa8 : Gujarati Letter Na
- 0x0000, // U+0aa9 : Undefined
- 0x0ac8, // U+0aaa : Gujarati Letter Pa
- 0x0ac9, // U+0aab : Gujarati Letter Pha
- 0x0aca, // U+0aac : Gujarati Letter Ba
- 0x0acb, // U+0aad : Gujarati Letter Bha
- 0x0acc, // U+0aae : Gujarati Letter Ma
- 0x0acd, // U+0aaf : Gujarati Letter Ya
- 0x0acf, // U+0ab0 : Gujarati Letter Ra
- 0x0000, // U+0ab1 : Undefined
- 0x0ad1, // U+0ab2 : Gujarati Letter La
- 0x0ad2, // U+0ab3 : Gujarati Letter Lla
- 0x0000, // U+0ab4 : Undefined
- 0x0ad4, // U+0ab5 : Gujarati Letter Va
- 0x0ad5, // U+0ab6 : Gujarati Letter Sha
- 0x0ad6, // U+0ab7 : Gujarati Letter Ssa
- 0x0ad7, // U+0ab8 : Gujarati Letter Sa
- 0x0ad8, // U+0ab9 : Gujarati Letter Ha
- 0x0000, // U+0aba : Undefined
- 0x0000, // U+0abb : Undefined
- 0x0ae9, // U+0abc : Gujarati Sign Nukta
- 0x1aea, // U+0abd : Gujarati Sign Avagraha
- 0x0ada, // U+0abe : Gujarati Vowel Sign Aa
- 0x0adb, // U+0abf : Gujarati Vowel Sign I
- 0x0adc, // U+0ac0 : Gujarati Vowel Sign Ii
- 0x0add, // U+0ac1 : Gujarati Vowel Sign U
- 0x0ade, // U+0ac2 : Gujarati Vowel Sign Uu
- 0x0adf, // U+0ac3 : Gujarati Vowel Sign Vocalic R
- 0x1adf, // U+0ac4 : Gujarati Vowel Sign Vocalic Rr
- 0x0ae3, // U+0ac5 : Gujarati Vowel Sign Candra E
- 0x0000, // U+0ac6 : Undefined
- 0x0ae0, // U+0ac7 : Gujarati Vowel Sign E
- 0x0ae2, // U+0ac8 : Gujarati Vowel Sign Ai
- 0x0ae7, // U+0ac9 : Gujarati Vowel Sign Candra O
- 0x0000, // U+0aca : Undefined
- 0x0ae4, // U+0acb : Gujarati Vowel Sign O
- 0x0ae6, // U+0acc : Gujarati Vowel Sign Au
- 0x0ae8, // U+0acd : Gujarati Sign Virama
- 0x0000, // U+0ace : Undefined
- 0x0000, // U+0acf : Undefined
- 0x1aa1, // U+0ad0 : Gujarati Om
- 0x0000, // U+0ad1 : Undefined
- 0x0000, // U+0ad2 : Undefined
- 0x0000, // U+0ad3 : Undefined
- 0x0000, // U+0ad4 : Undefined
- 0x0000, // U+0ad5 : Undefined
- 0x0000, // U+0ad6 : Undefined
- 0x0000, // U+0ad7 : Undefined
- 0x0000, // U+0ad8 : Undefined
- 0x0000, // U+0ad9 : Undefined
- 0x0000, // U+0ada : Undefined
- 0x0000, // U+0adb : Undefined
- 0x0000, // U+0adc : Undefined
- 0x0000, // U+0add : Undefined
- 0x0000, // U+0ade : Undefined
- 0x0000, // U+0adf : Undefined
- 0x1aaa, // U+0ae0 : Gujarati Letter Vocalic Rr
- 0x0000, // U+0ae1 : Undefined
- 0x0000, // U+0ae2 : Undefined
- 0x0000, // U+0ae3 : Undefined
- 0x0000, // U+0ae4 : Undefined
- 0x0000, // U+0ae5 : Undefined
- 0x0af1, // U+0ae6 : Gujarati Digit Zero
- 0x0af2, // U+0ae7 : Gujarati Digit One
- 0x0af3, // U+0ae8 : Gujarati Digit Two
- 0x0af4, // U+0ae9 : Gujarati Digit Three
- 0x0af5, // U+0aea : Gujarati Digit Four
- 0x0af6, // U+0aeb : Gujarati Digit Five
- 0x0af7, // U+0aec : Gujarati Digit Six
- 0x0af8, // U+0aed : Gujarati Digit Seven
- 0x0af9, // U+0aee : Gujarati Digit Eight
- 0x0afa, // U+0aef : Gujarati Digit Nine
- 0x0000, // U+0af0 : Undefined
- 0x0000, // U+0af1 : Undefined
- 0x0000, // U+0af2 : Undefined
- 0x0000, // U+0af3 : Undefined
- 0x0000, // U+0af4 : Undefined
- 0x0000, // U+0af5 : Undefined
- 0x0000, // U+0af6 : Undefined
- 0x0000, // U+0af7 : Undefined
- 0x0000, // U+0af8 : Undefined
- 0x0000, // U+0af9 : Undefined
- 0x0000, // U+0afa : Undefined
- 0x0000, // U+0afb : Undefined
- 0x0000, // U+0afc : Undefined
- 0x0000, // U+0afd : Undefined
- 0x0000, // U+0afe : Undefined
- 0x0000, // U+0aff : Undefined
- 0x0000, // U+0b00 : Undefined
- 0x07a1, // U+0b01 : Oriya Sign Candrabindu
- 0x07a2, // U+0b02 : Oriya Sign Anusvara
- 0x07a3, // U+0b03 : Oriya Sign Visarga
- 0x0000, // U+0b04 : Undefined
- 0x07a4, // U+0b05 : Oriya Letter A
- 0x07a5, // U+0b06 : Oriya Letter Aa
- 0x07a6, // U+0b07 : Oriya Letter I
- 0x07a7, // U+0b08 : Oriya Letter Ii
- 0x07a8, // U+0b09 : Oriya Letter U
- 0x07a9, // U+0b0a : Oriya Letter Uu
- 0x07aa, // U+0b0b : Oriya Letter Vocalic R
- 0x17a6, // U+0b0c : Oriya Letter Vocalic L
- 0x0000, // U+0b0d : Undefined
- 0x0000, // U+0b0e : Undefined
- 0x07ab, // U+0b0f : Oriya Letter E
- 0x07ad, // U+0b10 : Oriya Letter Ai
- 0x0000, // U+0b11 : Undefined
- 0x0000, // U+0b12 : Undefined
- 0x07b0, // U+0b13 : Oriya Letter O
- 0x07b1, // U+0b14 : Oriya Letter Au
- 0x07b3, // U+0b15 : Oriya Letter Ka
- 0x07b4, // U+0b16 : Oriya Letter Kha
- 0x07b5, // U+0b17 : Oriya Letter Ga
- 0x07b6, // U+0b18 : Oriya Letter Gha
- 0x07b7, // U+0b19 : Oriya Letter Nga
- 0x07b8, // U+0b1a : Oriya Letter Ca
- 0x07b9, // U+0b1b : Oriya Letter Cha
- 0x07ba, // U+0b1c : Oriya Letter Ja
- 0x07bb, // U+0b1d : Oriya Letter Jha
- 0x07bc, // U+0b1e : Oriya Letter Nya
- 0x07bd, // U+0b1f : Oriya Letter Tta
- 0x07be, // U+0b20 : Oriya Letter Ttha
- 0x07bf, // U+0b21 : Oriya Letter Dda
- 0x07c0, // U+0b22 : Oriya Letter Ddha
- 0x07c1, // U+0b23 : Oriya Letter Nna
- 0x07c2, // U+0b24 : Oriya Letter Ta
- 0x07c3, // U+0b25 : Oriya Letter Tha
- 0x07c4, // U+0b26 : Oriya Letter Da
- 0x07c5, // U+0b27 : Oriya Letter Dha
- 0x07c6, // U+0b28 : Oriya Letter Na
- 0x0000, // U+0b29 : Undefined
- 0x07c8, // U+0b2a : Oriya Letter Pa
- 0x07c9, // U+0b2b : Oriya Letter Pha
- 0x07ca, // U+0b2c : Oriya Letter Ba
- 0x07cb, // U+0b2d : Oriya Letter Bha
- 0x07cc, // U+0b2e : Oriya Letter Ma
- 0x07cd, // U+0b2f : Oriya Letter Ya
- 0x07cf, // U+0b30 : Oriya Letter Ra
- 0x0000, // U+0b31 : Undefined
- 0x07d1, // U+0b32 : Oriya Letter La
- 0x07d2, // U+0b33 : Oriya Letter Lla
- 0x0000, // U+0b34 : Undefined
- 0x0000, // U+0b35 : Undefined
- 0x07d5, // U+0b36 : Oriya Letter Sha
- 0x07d6, // U+0b37 : Oriya Letter Ssa
- 0x07d7, // U+0b38 : Oriya Letter Sa
- 0x07d8, // U+0b39 : Oriya Letter Ha
- 0x0000, // U+0b3a : Undefined
- 0x0000, // U+0b3b : Undefined
- 0x07e9, // U+0b3c : Oriya Sign Nukta
- 0x17ea, // U+0b3d : Oriya Sign Avagraha
- 0x07da, // U+0b3e : Oriya Vowel Sign Aa
- 0x07db, // U+0b3f : Oriya Vowel Sign I
- 0x07dc, // U+0b40 : Oriya Vowel Sign Ii
- 0x07dd, // U+0b41 : Oriya Vowel Sign U
- 0x07de, // U+0b42 : Oriya Vowel Sign Uu
- 0x07df, // U+0b43 : Oriya Vowel Sign Vocalic R
- 0x0000, // U+0b44 : Undefined
- 0x0000, // U+0b45 : Undefined
- 0x0000, // U+0b46 : Undefined
- 0x07e0, // U+0b47 : Oriya Vowel Sign E
- 0x07e2, // U+0b48 : Oriya Vowel Sign Ai
- 0x0000, // U+0b49 : Undefined
- 0x0000, // U+0b4a : Undefined
- 0x07e4, // U+0b4b : Oriya Vowel Sign O
- 0x07e6, // U+0b4c : Oriya Vowel Sign Au
- 0x07e8, // U+0b4d : Oriya Sign Virama
- 0x0000, // U+0b4e : Undefined
- 0x0000, // U+0b4f : Undefined
- 0x0000, // U+0b50 : Undefined
- 0x0000, // U+0b51 : Undefined
- 0x0000, // U+0b52 : Undefined
- 0x0000, // U+0b53 : Undefined
- 0x0000, // U+0b54 : Undefined
- 0x0000, // U+0b55 : Undefined
- 0x0000, // U+0b56 : Oriya Ai Length Mark
- 0x0000, // U+0b57 : Oriya Au Length Mark
- 0x0000, // U+0b58 : Undefined
- 0x0000, // U+0b59 : Undefined
- 0x0000, // U+0b5a : Undefined
- 0x0000, // U+0b5b : Undefined
- 0x17bf, // U+0b5c : Oriya Letter Rra
- 0x17c0, // U+0b5d : Oriya Letter Rha
- 0x0000, // U+0b5e : Undefined
- 0x07ce, // U+0b5f : Oriya Letter Yya
- 0x17aa, // U+0b60 : Oriya Letter Vocalic Rr
- 0x17a7, // U+0b61 : Oriya Letter Vocalic Ll
- 0x0000, // U+0b62 : Undefined
- 0x0000, // U+0b63 : Undefined
- 0x0000, // U+0b64 : Undefined
- 0x0000, // U+0b65 : Undefined
- 0x07f1, // U+0b66 : Oriya Digit Zero
- 0x07f2, // U+0b67 : Oriya Digit One
- 0x07f3, // U+0b68 : Oriya Digit Two
- 0x07f4, // U+0b69 : Oriya Digit Three
- 0x07f5, // U+0b6a : Oriya Digit Four
- 0x07f6, // U+0b6b : Oriya Digit Five
- 0x07f7, // U+0b6c : Oriya Digit Six
- 0x07f8, // U+0b6d : Oriya Digit Seven
- 0x07f9, // U+0b6e : Oriya Digit Eight
- 0x07fa, // U+0b6f : Oriya Digit Nine
- 0x0000, // U+0b70 : Oriya Isshar
- 0x0000, // U+0b71 : Undefined
- 0x0000, // U+0b72 : Undefined
- 0x0000, // U+0b73 : Undefined
- 0x0000, // U+0b74 : Undefined
- 0x0000, // U+0b75 : Undefined
- 0x0000, // U+0b76 : Undefined
- 0x0000, // U+0b77 : Undefined
- 0x0000, // U+0b78 : Undefined
- 0x0000, // U+0b79 : Undefined
- 0x0000, // U+0b7a : Undefined
- 0x0000, // U+0b7b : Undefined
- 0x0000, // U+0b7c : Undefined
- 0x0000, // U+0b7d : Undefined
- 0x0000, // U+0b7e : Undefined
- 0x0000, // U+0b7f : Undefined
- 0x0000, // U+0b80 : Undefined
- 0x0000, // U+0b81 : Undefined
- 0x04a2, // U+0b82 : Tamil Sign Anusvara
- 0x04a3, // U+0b83 : Tamil Sign Visarga
- 0x0000, // U+0b84 : Undefined
- 0x04a4, // U+0b85 : Tamil Letter A
- 0x04a5, // U+0b86 : Tamil Letter Aa
- 0x04a6, // U+0b87 : Tamil Letter I
- 0x04a7, // U+0b88 : Tamil Letter Ii
- 0x04a8, // U+0b89 : Tamil Letter U
- 0x04a9, // U+0b8a : Tamil Letter Uu
- 0x0000, // U+0b8b : Undefined
- 0x0000, // U+0b8c : Undefined
- 0x0000, // U+0b8d : Undefined
- 0x0000, // U+0b8e : Tamil Letter E
- 0x04ab, // U+0b8f : Tamil Letter Ee
- 0x04ad, // U+0b90 : Tamil Letter Ai
- 0x0000, // U+0b91 : Undefined
- 0x04af, // U+0b92 : Tamil Letter O
- 0x04b0, // U+0b93 : Tamil Letter Oo
- 0x04b1, // U+0b94 : Tamil Letter Au
- 0x04b3, // U+0b95 : Tamil Letter Ka
- 0x0000, // U+0b96 : Undefined
- 0x0000, // U+0b97 : Undefined
- 0x0000, // U+0b98 : Undefined
- 0x04b7, // U+0b99 : Tamil Letter Nga
- 0x04b8, // U+0b9a : Tamil Letter Ca
- 0x0000, // U+0b9b : Undefined
- 0x04ba, // U+0b9c : Tamil Letter Ja
- 0x0000, // U+0b9d : Undefined
- 0x04bc, // U+0b9e : Tamil Letter Nya
- 0x04bd, // U+0b9f : Tamil Letter Tta
- 0x0000, // U+0ba0 : Undefined
- 0x0000, // U+0ba1 : Undefined
- 0x0000, // U+0ba2 : Undefined
- 0x04c1, // U+0ba3 : Tamil Letter Nna
- 0x04c2, // U+0ba4 : Tamil Letter Ta
- 0x0000, // U+0ba5 : Undefined
- 0x0000, // U+0ba6 : Undefined
- 0x0000, // U+0ba7 : Undefined
- 0x04c6, // U+0ba8 : Tamil Letter Na
- 0x04c7, // U+0ba9 : Tamil Letter Nnna
- 0x04c8, // U+0baa : Tamil Letter Pa
- 0x0000, // U+0bab : Undefined
- 0x0000, // U+0bac : Undefined
- 0x0000, // U+0bad : Undefined
- 0x04cc, // U+0bae : Tamil Letter Ma
- 0x04cd, // U+0baf : Tamil Letter Ya
- 0x04cf, // U+0bb0 : Tamil Letter Ra
- 0x04d0, // U+0bb1 : Tamil Letter Rra
- 0x04d1, // U+0bb2 : Tamil Letter La
- 0x04d2, // U+0bb3 : Tamil Letter Lla
- 0x04d3, // U+0bb4 : Tamil Letter Llla
- 0x04d4, // U+0bb5 : Tamil Letter Va
- 0x0000, // U+0bb6 : Undefined
- 0x04d5, // U+0bb7 : Tamil Letter Ssa
- 0x04d7, // U+0bb8 : Tamil Letter Sa
- 0x04d8, // U+0bb9 : Tamil Letter Ha
- 0x0000, // U+0bba : Undefined
- 0x0000, // U+0bbb : Undefined
- 0x0000, // U+0bbc : Undefined
- 0x0000, // U+0bbd : Undefined
- 0x04da, // U+0bbe : Tamil Vowel Sign Aa
- 0x04db, // U+0bbf : Tamil Vowel Sign I
- 0x04dc, // U+0bc0 : Tamil Vowel Sign Ii
- 0x04dd, // U+0bc1 : Tamil Vowel Sign U
- 0x04de, // U+0bc2 : Tamil Vowel Sign Uu
- 0x0000, // U+0bc3 : Undefined
- 0x0000, // U+0bc4 : Undefined
- 0x0000, // U+0bc5 : Undefined
- 0x04e0, // U+0bc6 : Tamil Vowel Sign E
- 0x04e1, // U+0bc7 : Tamil Vowel Sign Ee
- 0x04e2, // U+0bc8 : Tamil Vowel Sign Ai
- 0x0000, // U+0bc9 : Undefined
- 0x04e4, // U+0bca : Tamil Vowel Sign O
- 0x04e5, // U+0bcb : Tamil Vowel Sign Oo
- 0x04e6, // U+0bcc : Tamil Vowel Sign Au
- 0x04e8, // U+0bcd : Tamil Sign Virama
- 0x0000, // U+0bce : Undefined
- 0x0000, // U+0bcf : Undefined
- 0x0000, // U+0bd0 : Undefined
- 0x0000, // U+0bd1 : Undefined
- 0x0000, // U+0bd2 : Undefined
- 0x0000, // U+0bd3 : Undefined
- 0x0000, // U+0bd4 : Undefined
- 0x0000, // U+0bd5 : Undefined
- 0x0000, // U+0bd6 : Undefined
- 0x0000, // U+0bd7 : Tamil Au Length Mark
- 0x0000, // U+0bd8 : Undefined
- 0x0000, // U+0bd9 : Undefined
- 0x0000, // U+0bda : Undefined
- 0x0000, // U+0bdb : Undefined
- 0x0000, // U+0bdc : Undefined
- 0x0000, // U+0bdd : Undefined
- 0x0000, // U+0bde : Undefined
- 0x0000, // U+0bdf : Undefined
- 0x0000, // U+0be0 : Undefined
- 0x0000, // U+0be1 : Undefined
- 0x0000, // U+0be2 : Undefined
- 0x0000, // U+0be3 : Undefined
- 0x0000, // U+0be4 : Undefined
- 0x0000, // U+0be5 : Undefined
- 0x0000, // U+0be6 : Undefined
- 0x04f2, // U+0be7 : Tamil Digit One
- 0x04f3, // U+0be8 : Tamil Digit Two
- 0x04f4, // U+0be9 : Tamil Digit Three
- 0x04f5, // U+0bea : Tamil Digit Four
- 0x04f6, // U+0beb : Tamil Digit Five
- 0x04f7, // U+0bec : Tamil Digit Six
- 0x04f8, // U+0bed : Tamil Digit Seven
- 0x04f9, // U+0bee : Tamil Digit Eight
- 0x04fa, // U+0bef : Tamil Digit Nine
- 0x0000, // U+0bf0 : Tamil Number Ten
- 0x0000, // U+0bf1 : Tamil Number One Hundred
- 0x0000, // U+0bf2 : Tamil Number One Thousand
- 0x0000, // U+0bf3 : Undefined
- 0x0000, // U+0bf4 : Undefined
- 0x0000, // U+0bf5 : Undefined
- 0x0000, // U+0bf6 : Undefined
- 0x0000, // U+0bf7 : Undefined
- 0x0000, // U+0bf8 : Undefined
- 0x0000, // U+0bf9 : Undefined
- 0x0000, // U+0bfa : Undefined
- 0x0000, // U+0bfb : Undefined
- 0x0000, // U+0bfc : Undefined
- 0x0000, // U+0bfd : Undefined
- 0x0000, // U+0bfe : Undefined
- 0x0000, // U+0bff : Undefined
- 0x0000, // U+0c00 : Undefined
- 0x05a1, // U+0c01 : Telugu Sign Candrabindu
- 0x05a2, // U+0c02 : Telugu Sign Anusvara
- 0x05a3, // U+0c03 : Telugu Sign Visarga
- 0x0000, // U+0c04 : Undefined
- 0x05a4, // U+0c05 : Telugu Letter A
- 0x05a5, // U+0c06 : Telugu Letter Aa
- 0x05a6, // U+0c07 : Telugu Letter I
- 0x05a7, // U+0c08 : Telugu Letter Ii
- 0x05a8, // U+0c09 : Telugu Letter U
- 0x05a9, // U+0c0a : Telugu Letter Uu
- 0x05aa, // U+0c0b : Telugu Letter Vocalic R
- 0x15a6, // U+0c0c : Telugu Letter Vocalic L
- 0x0000, // U+0c0d : Undefined
- 0x05ab, // U+0c0e : Telugu Letter E
- 0x05ac, // U+0c0f : Telugu Letter Ee
- 0x05ad, // U+0c10 : Telugu Letter Ai
- 0x0000, // U+0c11 : Undefined
- 0x05af, // U+0c12 : Telugu Letter O
- 0x05b0, // U+0c13 : Telugu Letter Oo
- 0x05b1, // U+0c14 : Telugu Letter Au
- 0x05b3, // U+0c15 : Telugu Letter Ka
- 0x05b4, // U+0c16 : Telugu Letter Kha
- 0x05b5, // U+0c17 : Telugu Letter Ga
- 0x05b6, // U+0c18 : Telugu Letter Gha
- 0x05b7, // U+0c19 : Telugu Letter Nga
- 0x05b8, // U+0c1a : Telugu Letter Ca
- 0x05b9, // U+0c1b : Telugu Letter Cha
- 0x05ba, // U+0c1c : Telugu Letter Ja
- 0x05bb, // U+0c1d : Telugu Letter Jha
- 0x05bc, // U+0c1e : Telugu Letter Nya
- 0x05bd, // U+0c1f : Telugu Letter Tta
- 0x05be, // U+0c20 : Telugu Letter Ttha
- 0x05bf, // U+0c21 : Telugu Letter Dda
- 0x05c0, // U+0c22 : Telugu Letter Ddha
- 0x05c1, // U+0c23 : Telugu Letter Nna
- 0x05c2, // U+0c24 : Telugu Letter Ta
- 0x05c3, // U+0c25 : Telugu Letter Tha
- 0x05c4, // U+0c26 : Telugu Letter Da
- 0x05c5, // U+0c27 : Telugu Letter Dha
- 0x05c6, // U+0c28 : Telugu Letter Na
- 0x0000, // U+0c29 : Undefined
- 0x05c8, // U+0c2a : Telugu Letter Pa
- 0x05c9, // U+0c2b : Telugu Letter Pha
- 0x05ca, // U+0c2c : Telugu Letter Ba
- 0x05cb, // U+0c2d : Telugu Letter Bha
- 0x05cc, // U+0c2e : Telugu Letter Ma
- 0x05cd, // U+0c2f : Telugu Letter Ya
- 0x05cf, // U+0c30 : Telugu Letter Ra
- 0x05d0, // U+0c31 : Telugu Letter Rra
- 0x05d1, // U+0c32 : Telugu Letter La
- 0x05d2, // U+0c33 : Telugu Letter Lla
- 0x0000, // U+0c34 : Undefined
- 0x05d4, // U+0c35 : Telugu Letter Va
- 0x05d5, // U+0c36 : Telugu Letter Sha
- 0x05d6, // U+0c37 : Telugu Letter Ssa
- 0x05d7, // U+0c38 : Telugu Letter Sa
- 0x05d8, // U+0c39 : Telugu Letter Ha
- 0x0000, // U+0c3a : Undefined
- 0x0000, // U+0c3b : Undefined
- 0x0000, // U+0c3c : Undefined
- 0x0000, // U+0c3d : Undefined
- 0x05da, // U+0c3e : Telugu Vowel Sign Aa
- 0x05db, // U+0c3f : Telugu Vowel Sign I
- 0x05dc, // U+0c40 : Telugu Vowel Sign Ii
- 0x05dd, // U+0c41 : Telugu Vowel Sign U
- 0x05de, // U+0c42 : Telugu Vowel Sign Uu
- 0x05df, // U+0c43 : Telugu Vowel Sign Vocalic R
- 0x15df, // U+0c44 : Telugu Vowel Sign Vocalic Rr
- 0x0000, // U+0c45 : Undefined
- 0x05e0, // U+0c46 : Telugu Vowel Sign E
- 0x05e1, // U+0c47 : Telugu Vowel Sign Ee
- 0x05e2, // U+0c48 : Telugu Vowel Sign Ai
- 0x0000, // U+0c49 : Undefined
- 0x05e4, // U+0c4a : Telugu Vowel Sign O
- 0x05e5, // U+0c4b : Telugu Vowel Sign Oo
- 0x05e6, // U+0c4c : Telugu Vowel Sign Au
- 0x05e8, // U+0c4d : Telugu Sign Virama
- 0x0000, // U+0c4e : Undefined
- 0x0000, // U+0c4f : Undefined
- 0x0000, // U+0c50 : Undefined
- 0x0000, // U+0c51 : Undefined
- 0x0000, // U+0c52 : Undefined
- 0x0000, // U+0c53 : Undefined
- 0x0000, // U+0c54 : Undefined
- 0x0000, // U+0c55 : Telugu Length Mark
- 0x0000, // U+0c56 : Telugu Ai Length Mark
- 0x0000, // U+0c57 : Undefined
- 0x0000, // U+0c58 : Undefined
- 0x0000, // U+0c59 : Undefined
- 0x0000, // U+0c5a : Undefined
- 0x0000, // U+0c5b : Undefined
- 0x0000, // U+0c5c : Undefined
- 0x0000, // U+0c5d : Undefined
- 0x0000, // U+0c5e : Undefined
- 0x0000, // U+0c5f : Undefined
- 0x15aa, // U+0c60 : Telugu Letter Vocalic Rr
- 0x15a7, // U+0c61 : Telugu Letter Vocalic Ll
- 0x0000, // U+0c62 : Undefined
- 0x0000, // U+0c63 : Undefined
- 0x0000, // U+0c64 : Undefined
- 0x0000, // U+0c65 : Undefined
- 0x05f1, // U+0c66 : Telugu Digit Zero
- 0x05f2, // U+0c67 : Telugu Digit One
- 0x05f3, // U+0c68 : Telugu Digit Two
- 0x05f4, // U+0c69 : Telugu Digit Three
- 0x05f5, // U+0c6a : Telugu Digit Four
- 0x05f6, // U+0c6b : Telugu Digit Five
- 0x05f7, // U+0c6c : Telugu Digit Six
- 0x05f8, // U+0c6d : Telugu Digit Seven
- 0x05f9, // U+0c6e : Telugu Digit Eight
- 0x05fa, // U+0c6f : Telugu Digit Nine
- 0x0000, // U+0c70 : Undefined
- 0x0000, // U+0c71 : Undefined
- 0x0000, // U+0c72 : Undefined
- 0x0000, // U+0c73 : Undefined
- 0x0000, // U+0c74 : Undefined
- 0x0000, // U+0c75 : Undefined
- 0x0000, // U+0c76 : Undefined
- 0x0000, // U+0c77 : Undefined
- 0x0000, // U+0c78 : Undefined
- 0x0000, // U+0c79 : Undefined
- 0x0000, // U+0c7a : Undefined
- 0x0000, // U+0c7b : Undefined
- 0x0000, // U+0c7c : Undefined
- 0x0000, // U+0c7d : Undefined
- 0x0000, // U+0c7e : Undefined
- 0x0000, // U+0c7f : Undefined
- 0x0000, // U+0c80 : Undefined
- 0x0000, // U+0c81 : Undefined
- 0x08a2, // U+0c82 : Kannada Sign Anusvara
- 0x08a3, // U+0c83 : Kannada Sign Visarga
- 0x0000, // U+0c84 : Undefined
- 0x08a4, // U+0c85 : Kannada Letter A
- 0x08a5, // U+0c86 : Kannada Letter Aa
- 0x08a6, // U+0c87 : Kannada Letter I
- 0x08a7, // U+0c88 : Kannada Letter Ii
- 0x08a8, // U+0c89 : Kannada Letter U
- 0x08a9, // U+0c8a : Kannada Letter Uu
- 0x08aa, // U+0c8b : Kannada Letter Vocalic R
- 0x18a6, // U+0c8c : Kannada Letter Vocalic L
- 0x0000, // U+0c8d : Undefined
- 0x08ab, // U+0c8e : Kannada Letter E
- 0x08ac, // U+0c8f : Kannada Letter Ee
- 0x08ad, // U+0c90 : Kannada Letter Ai
- 0x0000, // U+0c91 : Undefined
- 0x08af, // U+0c92 : Kannada Letter O
- 0x08b0, // U+0c93 : Kannada Letter Oo
- 0x08b1, // U+0c94 : Kannada Letter Au
- 0x08b3, // U+0c95 : Kannada Letter Ka
- 0x08b4, // U+0c96 : Kannada Letter Kha
- 0x08b5, // U+0c97 : Kannada Letter Ga
- 0x08b6, // U+0c98 : Kannada Letter Gha
- 0x08b7, // U+0c99 : Kannada Letter Nga
- 0x08b8, // U+0c9a : Kannada Letter Ca
- 0x08b9, // U+0c9b : Kannada Letter Cha
- 0x08ba, // U+0c9c : Kannada Letter Ja
- 0x08bb, // U+0c9d : Kannada Letter Jha
- 0x08bc, // U+0c9e : Kannada Letter Nya
- 0x08bd, // U+0c9f : Kannada Letter Tta
- 0x08be, // U+0ca0 : Kannada Letter Ttha
- 0x08bf, // U+0ca1 : Kannada Letter Dda
- 0x08c0, // U+0ca2 : Kannada Letter Ddha
- 0x08c1, // U+0ca3 : Kannada Letter Nna
- 0x08c2, // U+0ca4 : Kannada Letter Ta
- 0x08c3, // U+0ca5 : Kannada Letter Tha
- 0x08c4, // U+0ca6 : Kannada Letter Da
- 0x08c5, // U+0ca7 : Kannada Letter Dha
- 0x08c6, // U+0ca8 : Kannada Letter Na
- 0x0000, // U+0ca9 : Undefined
- 0x08c8, // U+0caa : Kannada Letter Pa
- 0x08c9, // U+0cab : Kannada Letter Pha
- 0x08ca, // U+0cac : Kannada Letter Ba
- 0x08cb, // U+0cad : Kannada Letter Bha
- 0x08cc, // U+0cae : Kannada Letter Ma
- 0x08cd, // U+0caf : Kannada Letter Ya
- 0x08cf, // U+0cb0 : Kannada Letter Ra
- 0x08d0, // U+0cb1 : Kannada Letter Rra
- 0x08d1, // U+0cb2 : Kannada Letter La
- 0x08d2, // U+0cb3 : Kannada Letter Lla
- 0x0000, // U+0cb4 : Undefined
- 0x08d4, // U+0cb5 : Kannada Letter Va
- 0x08d5, // U+0cb6 : Kannada Letter Sha
- 0x08d6, // U+0cb7 : Kannada Letter Ssa
- 0x08d7, // U+0cb8 : Kannada Letter Sa
- 0x08d8, // U+0cb9 : Kannada Letter Ha
- 0x0000, // U+0cba : Undefined
- 0x0000, // U+0cbb : Undefined
- 0x0000, // U+0cbc : Undefined
- 0x0000, // U+0cbd : Undefined
- 0x08da, // U+0cbe : Kannada Vowel Sign Aa
- 0x08db, // U+0cbf : Kannada Vowel Sign I
- 0x08dc, // U+0cc0 : Kannada Vowel Sign Ii
- 0x08dd, // U+0cc1 : Kannada Vowel Sign U
- 0x08de, // U+0cc2 : Kannada Vowel Sign Uu
- 0x08df, // U+0cc3 : Kannada Vowel Sign Vocalic R
- 0x18df, // U+0cc4 : Kannada Vowel Sign Vocalic Rr
- 0x0000, // U+0cc5 : Undefined
- 0x08e0, // U+0cc6 : Kannada Vowel Sign E
- 0x08e1, // U+0cc7 : Kannada Vowel Sign Ee
- 0x08e2, // U+0cc8 : Kannada Vowel Sign Ai
- 0x0000, // U+0cc9 : Undefined
- 0x08e4, // U+0cca : Kannada Vowel Sign O
- 0x08e5, // U+0ccb : Kannada Vowel Sign Oo
- 0x08e6, // U+0ccc : Kannada Vowel Sign Au
- 0x08e8, // U+0ccd : Kannada Sign Virama
- 0x0000, // U+0cce : Undefined
- 0x0000, // U+0ccf : Undefined
- 0x0000, // U+0cd0 : Undefined
- 0x0000, // U+0cd1 : Undefined
- 0x0000, // U+0cd2 : Undefined
- 0x0000, // U+0cd3 : Undefined
- 0x0000, // U+0cd4 : Undefined
- 0x0000, // U+0cd5 : Kannada Length Mark
- 0x0000, // U+0cd6 : Kannada Ai Length Mark
- 0x0000, // U+0cd7 : Undefined
- 0x0000, // U+0cd8 : Undefined
- 0x0000, // U+0cd9 : Undefined
- 0x0000, // U+0cda : Undefined
- 0x0000, // U+0cdb : Undefined
- 0x0000, // U+0cdc : Undefined
- 0x0000, // U+0cdd : Undefined
- 0x18c9, // U+0cde : Kannada Letter Fa
- 0x0000, // U+0cdf : Undefined
- 0x18aa, // U+0ce0 : Kannada Letter Vocalic Rr
- 0x18a7, // U+0ce1 : Kannada Letter Vocalic Ll
- 0x0000, // U+0ce2 : Undefined
- 0x0000, // U+0ce3 : Undefined
- 0x0000, // U+0ce4 : Undefined
- 0x0000, // U+0ce5 : Undefined
- 0x08f1, // U+0ce6 : Kannada Digit Zero
- 0x08f2, // U+0ce7 : Kannada Digit One
- 0x08f3, // U+0ce8 : Kannada Digit Two
- 0x08f4, // U+0ce9 : Kannada Digit Three
- 0x08f5, // U+0cea : Kannada Digit Four
- 0x08f6, // U+0ceb : Kannada Digit Five
- 0x08f7, // U+0cec : Kannada Digit Six
- 0x08f8, // U+0ced : Kannada Digit Seven
- 0x08f9, // U+0cee : Kannada Digit Eight
- 0x08fa, // U+0cef : Kannada Digit Nine
- 0x0000, // U+0cf0 : Undefined
- 0x0000, // U+0cf1 : Undefined
- 0x0000, // U+0cf2 : Undefined
- 0x0000, // U+0cf3 : Undefined
- 0x0000, // U+0cf4 : Undefined
- 0x0000, // U+0cf5 : Undefined
- 0x0000, // U+0cf6 : Undefined
- 0x0000, // U+0cf7 : Undefined
- 0x0000, // U+0cf8 : Undefined
- 0x0000, // U+0cf9 : Undefined
- 0x0000, // U+0cfa : Undefined
- 0x0000, // U+0cfb : Undefined
- 0x0000, // U+0cfc : Undefined
- 0x0000, // U+0cfd : Undefined
- 0x0000, // U+0cfe : Undefined
- 0x0000, // U+0cff : Undefined
- 0x0000, // U+0d00 : Undefined
- 0x0000, // U+0d01 : Undefined
- 0x09a2, // U+0d02 : Malayalam Sign Anusvara
- 0x09a3, // U+0d03 : Malayalam Sign Visarga
- 0x0000, // U+0d04 : Undefined
- 0x09a4, // U+0d05 : Malayalam Letter A
- 0x09a5, // U+0d06 : Malayalam Letter Aa
- 0x09a6, // U+0d07 : Malayalam Letter I
- 0x09a7, // U+0d08 : Malayalam Letter Ii
- 0x09a8, // U+0d09 : Malayalam Letter U
- 0x09a9, // U+0d0a : Malayalam Letter Uu
- 0x09aa, // U+0d0b : Malayalam Letter Vocalic R
- 0x19a6, // U+0d0c : Malayalam Letter Vocalic L
- 0x0000, // U+0d0d : Undefined
- 0x09ab, // U+0d0e : Malayalam Letter E
- 0x09ac, // U+0d0f : Malayalam Letter Ee
- 0x09ad, // U+0d10 : Malayalam Letter Ai
- 0x0000, // U+0d11 : Undefined
- 0x09af, // U+0d12 : Malayalam Letter O
- 0x09b0, // U+0d13 : Malayalam Letter Oo
- 0x09b1, // U+0d14 : Malayalam Letter Au
- 0x09b3, // U+0d15 : Malayalam Letter Ka
- 0x09b4, // U+0d16 : Malayalam Letter Kha
- 0x09b5, // U+0d17 : Malayalam Letter Ga
- 0x09b6, // U+0d18 : Malayalam Letter Gha
- 0x09b7, // U+0d19 : Malayalam Letter Nga
- 0x09b8, // U+0d1a : Malayalam Letter Ca
- 0x09b9, // U+0d1b : Malayalam Letter Cha
- 0x09ba, // U+0d1c : Malayalam Letter Ja
- 0x09bb, // U+0d1d : Malayalam Letter Jha
- 0x09bc, // U+0d1e : Malayalam Letter Nya
- 0x09bd, // U+0d1f : Malayalam Letter Tta
- 0x09be, // U+0d20 : Malayalam Letter Ttha
- 0x09bf, // U+0d21 : Malayalam Letter Dda
- 0x09c0, // U+0d22 : Malayalam Letter Ddha
- 0x09c1, // U+0d23 : Malayalam Letter Nna
- 0x09c2, // U+0d24 : Malayalam Letter Ta
- 0x09c3, // U+0d25 : Malayalam Letter Tha
- 0x09c4, // U+0d26 : Malayalam Letter Da
- 0x09c5, // U+0d27 : Malayalam Letter Dha
- 0x09c6, // U+0d28 : Malayalam Letter Na
- 0x0000, // U+0d29 : Undefined
- 0x09c8, // U+0d2a : Malayalam Letter Pa
- 0x09c9, // U+0d2b : Malayalam Letter Pha
- 0x09ca, // U+0d2c : Malayalam Letter Ba
- 0x09cb, // U+0d2d : Malayalam Letter Bha
- 0x09cc, // U+0d2e : Malayalam Letter Ma
- 0x09cd, // U+0d2f : Malayalam Letter Ya
- 0x09cf, // U+0d30 : Malayalam Letter Ra
- 0x09d0, // U+0d31 : Malayalam Letter Rra
- 0x09d1, // U+0d32 : Malayalam Letter La
- 0x09d2, // U+0d33 : Malayalam Letter Lla
- 0x09d3, // U+0d34 : Malayalam Letter Llla
- 0x09d4, // U+0d35 : Malayalam Letter Va
- 0x09d5, // U+0d36 : Malayalam Letter Sha
- 0x09d6, // U+0d37 : Malayalam Letter Ssa
- 0x09d7, // U+0d38 : Malayalam Letter Sa
- 0x09d8, // U+0d39 : Malayalam Letter Ha
- 0x0000, // U+0d3a : Undefined
- 0x0000, // U+0d3b : Undefined
- 0x0000, // U+0d3c : Undefined
- 0x0000, // U+0d3d : Undefined
- 0x09da, // U+0d3e : Malayalam Vowel Sign Aa
- 0x09db, // U+0d3f : Malayalam Vowel Sign I
- 0x09dc, // U+0d40 : Malayalam Vowel Sign Ii
- 0x09dd, // U+0d41 : Malayalam Vowel Sign U
- 0x09de, // U+0d42 : Malayalam Vowel Sign Uu
- 0x09df, // U+0d43 : Malayalam Vowel Sign Vocalic R
- 0x0000, // U+0d44 : Undefined
- 0x0000, // U+0d45 : Undefined
- 0x09e0, // U+0d46 : Malayalam Vowel Sign E
- 0x09e1, // U+0d47 : Malayalam Vowel Sign Ee
- 0x09e2, // U+0d48 : Malayalam Vowel Sign Ai
- 0x0000, // U+0d49 : Undefined
- 0x09e4, // U+0d4a : Malayalam Vowel Sign O
- 0x09e5, // U+0d4b : Malayalam Vowel Sign Oo
- 0x09e6, // U+0d4c : Malayalam Vowel Sign Au
- 0x09e8, // U+0d4d : Malayalam Sign Virama
- 0x0000, // U+0d4e : Undefined
- 0x0000, // U+0d4f : Undefined
- 0x0000, // U+0d50 : Undefined
- 0x0000, // U+0d51 : Undefined
- 0x0000, // U+0d52 : Undefined
- 0x0000, // U+0d53 : Undefined
- 0x0000, // U+0d54 : Undefined
- 0x0000, // U+0d55 : Undefined
- 0x0000, // U+0d56 : Undefined
- 0x0000, // U+0d57 : Malayalam Au Length Mark
- 0x0000, // U+0d58 : Undefined
- 0x0000, // U+0d59 : Undefined
- 0x0000, // U+0d5a : Undefined
- 0x0000, // U+0d5b : Undefined
- 0x0000, // U+0d5c : Undefined
- 0x0000, // U+0d5d : Undefined
- 0x0000, // U+0d5e : Undefined
- 0x0000, // U+0d5f : Undefined
- 0x19aa, // U+0d60 : Malayalam Letter Vocalic Rr
- 0x19a7, // U+0d61 : Malayalam Letter Vocalic Ll
- 0x0000, // U+0d62 : Undefined
- 0x0000, // U+0d63 : Undefined
- 0x0000, // U+0d64 : Undefined
- 0x0000, // U+0d65 : Undefined
- 0x09f1, // U+0d66 : Malayalam Digit Zero
- 0x09f2, // U+0d67 : Malayalam Digit One
- 0x09f3, // U+0d68 : Malayalam Digit Two
- 0x09f4, // U+0d69 : Malayalam Digit Three
- 0x09f5, // U+0d6a : Malayalam Digit Four
- 0x09f6, // U+0d6b : Malayalam Digit Five
- 0x09f7, // U+0d6c : Malayalam Digit Six
- 0x09f8, // U+0d6d : Malayalam Digit Seven
- 0x09f9, // U+0d6e : Malayalam Digit Eight
- 0x09fa // U+0d6f : Malayalam Digit Nine
- };
-
- ////////////////////////////////////////////////////////////////////////////
- // SecondIndicByte
- //
- // This is used if the UnicodeToIndic table 4 high bits are set, this is
- // the value of the second Indic byte when applicable.
- ////////////////////////////////////////////////////////////////////////////
- static byte[] SecondIndicByte =
- {
- 0x00,
- 0xe9,
- 0xb8, // U+0952 == 0xf0_0xb8
- 0xbf // U+0970 == 0xf0_0xbf
- };
-
- ////////////////////////////////////////////////////////////////////////////
- // IndicMapping
- //
- // This table maps the 10 indic code pages to their unicode counterparts.
- // There are 0x60 characters in each table. The tables are in pairs of 2
- // (1st char, 2nd char) and there are 10 tables (1 for each code page "font")
- ////////////////////////////////////////////////////////////////////////////
- static int[] IndicMappingIndex =
- {
- -1, // 0 DEF 0X40 Default // Not a real code page
- -1, // 1 RMN 0X41 Roman // Transliteration not supported
- 0, // 2 DEV 0X42 Devanagari
- 1, // 3 BNG 0X43 Bengali
- 2, // 4 TML 0X44 Tamil
- 3, // 5 TLG 0X45 Telugu
- 1, // 6 ASM 0X46 Assamese (Bengali) - Reuses table 1
- 4, // 7 ORI 0X47 Oriya
- 5, // 8 KND 0X48 Kannada
- 6, // 9 MLM 0X49 Malayalam
- 7, // 10 GJR 0X4A Gujarati
- 8 // 11 PNJ 0X4B Punjabi (Gurmukhi)
- };
-
- ////////////////////////////////////////////////////////////////////////////
- // IndicMapping
- //
- // This table contains 9 tables for the 10 indic code pages to their unicode counterparts.
- // There are 0x60 characters in each table. The tables are in pairs of 2
- // (1st char, 2nd char) and there are 10 tables (1 for each code page "font")
- //
- // The first index is the table index (from the IndicMappingIndex table),
- // the 2nd the byte index, the third the character index.
- //
- // For byte 0 a 0x0000 value indicates an unknown character
- // For byte 1 a 0 value indicates no special attributes.
- // For byte 1, 200C & 200D are Virama, Nukta special cases
- // For byte 1, B8BF is Devanagari stress & abbreviation sign special cases
- //
- // WARNING: When copying these from windows, ? 0x003F were changed to 0x0000.
- //
- ////////////////////////////////////////////////////////////////////////////
- // char[codePageMapIndex][byte][character]
- static char[,,] IndicMapping =
- {
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Devanagari Table 0, Code Page (2, 0x42, 57002)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0901', '\x0902', '\x0903', '\x0905', '\x0906', '\x0907', '\x0908',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0909', '\x090a', '\x090b', '\x090e', '\x090f', '\x0910', '\x090d', '\x0912',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0913', '\x0914', '\x0911', '\x0915', '\x0916', '\x0917', '\x0918', '\x0919',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x091a', '\x091b', '\x091c', '\x091d', '\x091e', '\x091f', '\x0920', '\x0921',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0922', '\x0923', '\x0924', '\x0925', '\x0926', '\x0927', '\x0928', '\x0929',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x092a', '\x092b', '\x092c', '\x092d', '\x092e', '\x092f', '\x095f', '\x0930',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0931', '\x0932', '\x0933', '\x0934', '\x0935', '\x0936', '\x0937', '\x0938',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0939', '\x0000', '\x093e', '\x093f', '\x0940', '\x0941', '\x0942', '\x0943',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0946', '\x0947', '\x0948', '\x0945', '\x094a', '\x094b', '\x094c', '\x0949',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x094d', '\x093c', '\x0964', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x0966', '\x0967', '\x0968', '\x0969', '\x096a', '\x096b', '\x096c',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x096d', '\x096e', '\x096f', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0950', '\x0', '\x0', '\x0', '\x0', '\x090c', '\x0961',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x0960', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0958', '\x0959', '\x095a', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x095b', '\x0', '\x0', '\x0', '\x0', '\x095c',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x095d', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x095e', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x0962', '\x0963', '\x0', '\x0', '\x0944',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x093d', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\xB8BF', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- },
-
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Bengali & Assemese Table 1', Code Pages (3, '43', 57003 & 6', '46', 57006)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0981', '\x0982', '\x0983', '\x0985', '\x0986', '\x0987', '\x0988',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0989', '\x098a', '\x098b', '\x098f', '\x098f', '\x0990', '\x0990', '\x0993',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0993', '\x0994', '\x0994', '\x0995', '\x0996', '\x0997', '\x0998', '\x0999',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x099a', '\x099b', '\x099c', '\x099d', '\x099e', '\x099f', '\x09a0', '\x09a1',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x09a2', '\x09a3', '\x09a4', '\x09a5', '\x09a6', '\x09a7', '\x09a8', '\x09a8',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x09aa', '\x09ab', '\x09ac', '\x09ad', '\x09ae', '\x09af', '\x09df', '\x09b0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x09b0', '\x09b2', '\x09b2', '\x09b2', '\x09ac', '\x09b6', '\x09b7', '\x09b8',
- // d8, d9, da, db, dc, dd, de, df,
- '\x09b9', '\x0000', '\x09be', '\x09bf', '\x09c0', '\x09c1', '\x09c2', '\x09c3',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x09c7', '\x09c7', '\x09c8', '\x09c8', '\x09cb', '\x09cb', '\x09cc', '\x09cc',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x09cd', '\x09bc', '\x002e', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x09e6', '\x09e7', '\x09e8', '\x09e9', '\x09ea', '\x09eb', '\x09ec',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x09ed', '\x09ee', '\x09ef', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x098c', '\x09e1',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x09e0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x09dc',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x09dd', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x09e2', '\x09e3', '\x0', '\x0', '\x09c4',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- },
-
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Tamil Table 2', Code Page (4, '44', 57004)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0000', '\x0b82', '\x0b83', '\x0b85', '\x0b86', '\x0b87', '\x0b88',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0b89', '\x0b8a', '\x0000', '\x0b8f', '\x0b8f', '\x0b90', '\x0b90', '\x0b92',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0b93', '\x0b94', '\x0b94', '\x0b95', '\x0b95', '\x0b95', '\x0b95', '\x0b99',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0b9a', '\x0b9a', '\x0b9c', '\x0b9c', '\x0b9e', '\x0b9f', '\x0b9f', '\x0b9f',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0b9f', '\x0ba3', '\x0ba4', '\x0ba4', '\x0ba4', '\x0ba4', '\x0ba8', '\x0ba9',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0baa', '\x0baa', '\x0baa', '\x0baa', '\x0bae', '\x0baf', '\x0baf', '\x0bb0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0bb1', '\x0bb2', '\x0bb3', '\x0bb4', '\x0bb5', '\x0bb7', '\x0bb7', '\x0bb8',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0bb9', '\x0000', '\x0bbe', '\x0bbf', '\x0bc0', '\x0bc1', '\x0bc2', '\x0000',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0bc6', '\x0bc7', '\x0bc8', '\x0bc8', '\x0bca', '\x0bcb', '\x0bcc', '\x0bcc',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x0bcd', '\x0000', '\x002e', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x0030', '\x0be7', '\x0be8', '\x0be9', '\x0bea', '\x0beb', '\x0bec',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0bed', '\x0bee', '\x0bef', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- },
-
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Telugu Table 3', Code Page (5, '45', 57005)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0c01', '\x0c02', '\x0c03', '\x0c05', '\x0c06', '\x0c07', '\x0c08',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0c09', '\x0c0a', '\x0c0b', '\x0c0e', '\x0c0f', '\x0c10', '\x0c10', '\x0c12',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0c13', '\x0c14', '\x0c14', '\x0c15', '\x0c16', '\x0c17', '\x0c18', '\x0c19',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0c1a', '\x0c1b', '\x0c1c', '\x0c1d', '\x0c1e', '\x0c1f', '\x0c20', '\x0c21',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0c22', '\x0c23', '\x0c24', '\x0c25', '\x0c26', '\x0c27', '\x0c28', '\x0c28',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0c2a', '\x0c2b', '\x0c2c', '\x0c2d', '\x0c2e', '\x0c2f', '\x0c2f', '\x0c30',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0c31', '\x0c32', '\x0c33', '\x0c33', '\x0c35', '\x0c36', '\x0c37', '\x0c38',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0c39', '\x0000', '\x0c3e', '\x0c3f', '\x0c40', '\x0c41', '\x0c42', '\x0c43',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0c46', '\x0c47', '\x0c48', '\x0c48', '\x0c4a', '\x0c4b', '\x0c4c', '\x0c4c',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x0c4d', '\x0000', '\x002e', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x0c66', '\x0c67', '\x0c68', '\x0c69', '\x0c6a', '\x0c6b', '\x0c6c',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0c6d', '\x0c6e', '\x0c6f', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0c0c', '\x0c61',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x0c60', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0c44',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- },
-
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Oriya Table 4', Code Page (7, '47', 57007)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0b01', '\x0b02', '\x0b03', '\x0b05', '\x0b06', '\x0b07', '\x0b08',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0b09', '\x0b0a', '\x0b0b', '\x0b0f', '\x0b0f', '\x0b10', '\x0b10', '\x0b10',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0b13', '\x0b14', '\x0b14', '\x0b15', '\x0b16', '\x0b17', '\x0b18', '\x0b19',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0b1a', '\x0b1b', '\x0b1c', '\x0b1d', '\x0b1e', '\x0b1f', '\x0b20', '\x0b21',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0b22', '\x0b23', '\x0b24', '\x0b25', '\x0b26', '\x0b27', '\x0b28', '\x0b28',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0b2a', '\x0b2b', '\x0b2c', '\x0b2d', '\x0b2e', '\x0b2f', '\x0b5f', '\x0b30',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0b30', '\x0b32', '\x0b33', '\x0b33', '\x0b2c', '\x0b36', '\x0b37', '\x0b38',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0b39', '\x0000', '\x0b3e', '\x0b3f', '\x0b40', '\x0b41', '\x0b42', '\x0b43',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0b47', '\x0b47', '\x0b48', '\x0b48', '\x0b4b', '\x0b4b', '\x0b4c', '\x0b4c',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x0b4d', '\x0b3c', '\x002e', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x0b66', '\x0b67', '\x0b68', '\x0b69', '\x0b6a', '\x0b6b', '\x0b6c',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0b6d', '\x0b6e', '\x0b6f', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0c0c', '\x0c61',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x0c60', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0b5c',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0b5d', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0c44',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x0b3d', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- },
-
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Kannada Table 5', Code Page (8, '48', 57008)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0000', '\x0c82', '\x0c83', '\x0c85', '\x0c86', '\x0c87', '\x0c88',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0c89', '\x0c8a', '\x0c8b', '\x0c8e', '\x0c8f', '\x0c90', '\x0c90', '\x0c92',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0c93', '\x0c94', '\x0c94', '\x0c95', '\x0c96', '\x0c97', '\x0c98', '\x0c99',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0c9a', '\x0c9b', '\x0c9c', '\x0c9d', '\x0c9e', '\x0c9f', '\x0ca0', '\x0ca1',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0ca2', '\x0ca3', '\x0ca4', '\x0ca5', '\x0ca6', '\x0ca7', '\x0ca8', '\x0ca8',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0caa', '\x0cab', '\x0cac', '\x0cad', '\x0cae', '\x0caf', '\x0caf', '\x0cb0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0cb1', '\x0cb2', '\x0cb3', '\x0cb3', '\x0cb5', '\x0cb6', '\x0cb7', '\x0cb8',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0cb9', '\x0000', '\x0cbe', '\x0cbf', '\x0cc0', '\x0cc1', '\x0cc2', '\x0cc3',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0cc6', '\x0cc7', '\x0cc8', '\x0cc8', '\x0cca', '\x0ccb', '\x0ccc', '\x0ccc',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x0ccd', '\x0000', '\x002e', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x0ce6', '\x0ce7', '\x0ce8', '\x0ce9', '\x0cea', '\x0ceb', '\x0cec',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0ced', '\x0cee', '\x0cef', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0c8c', '\x0ce1',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x0ce0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x0cde', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0cc4',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- },
-
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Malayalam Table 6', Code Page (9, '49', 57009)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0000', '\x0d02', '\x0d03', '\x0d05', '\x0d06', '\x0d07', '\x0d08',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0d09', '\x0d0a', '\x0d0b', '\x0d0e', '\x0d0f', '\x0d10', '\x0d10', '\x0d12',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0d13', '\x0d14', '\x0d14', '\x0d15', '\x0d16', '\x0d17', '\x0d18', '\x0d19',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0d1a', '\x0d1b', '\x0d1c', '\x0d1d', '\x0d1e', '\x0d1f', '\x0d20', '\x0d21',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0d22', '\x0d23', '\x0d24', '\x0d25', '\x0d26', '\x0d27', '\x0d28', '\x0d28',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0d2a', '\x0d2b', '\x0d2c', '\x0d2d', '\x0d2e', '\x0d2f', '\x0d2f', '\x0d30',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0d31', '\x0d32', '\x0d33', '\x0d34', '\x0d35', '\x0d36', '\x0d37', '\x0d38',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0d39', '\x0000', '\x0d3e', '\x0d3f', '\x0d40', '\x0d41', '\x0d42', '\x0d43',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0d46', '\x0d47', '\x0d48', '\x0d48', '\x0d4a', '\x0d4b', '\x0d4c', '\x0d4c',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x0d4d', '\x0000', '\x002e', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x0d66', '\x0d67', '\x0d68', '\x0d69', '\x0d6a', '\x0d6b', '\x0d6c',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0d6d', '\x0d6e', '\x0d6f', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0d0c', '\x0d61',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x0d60', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- },
-
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Gujarati Table 7', Code Page (10', '4a', 57010)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0a81', '\x0a82', '\x0a83', '\x0a85', '\x0a86', '\x0a87', '\x0a88',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0a89', '\x0a8a', '\x0a8b', '\x0a8f', '\x0a8f', '\x0a90', '\x0a8d', '\x0a8d',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0a93', '\x0a94', '\x0a91', '\x0a95', '\x0a96', '\x0a97', '\x0a98', '\x0a99',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0a9a', '\x0a9b', '\x0a9c', '\x0a9d', '\x0a9e', '\x0a9f', '\x0aa0', '\x0aa1',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0aa2', '\x0aa3', '\x0aa4', '\x0aa5', '\x0aa6', '\x0aa7', '\x0aa8', '\x0aa8',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0aaa', '\x0aab', '\x0aac', '\x0aad', '\x0aae', '\x0aaf', '\x0aaf', '\x0ab0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0ab0', '\x0ab2', '\x0ab3', '\x0ab3', '\x0ab5', '\x0ab6', '\x0ab7', '\x0ab8',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0ab9', '\x0000', '\x0abe', '\x0abf', '\x0ac0', '\x0ac1', '\x0ac2', '\x0ac3',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0ac7', '\x0ac7', '\x0ac8', '\x0ac5', '\x0acb', '\x0acb', '\x0acc', '\x0ac9',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x0acd', '\x0abc', '\x002e', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x0ae6', '\x0ae7', '\x0ae8', '\x0ae9', '\x0aea', '\x0aeb', '\x0aec',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0aed', '\x0aee', '\x0aef', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0ad0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x0ae0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0ac4',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x0abd', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- },
-
- {
- ////////////////////////////////////////////////////////////////////////////
- //
- // Punjabi (Gurmukhi) Table 8', Code Page (11', '4b', 57011)
- //
- ////////////////////////////////////////////////////////////////////////////
-
- // Default Unicode Char
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0000', '\x0000', '\x0a02', '\x0000', '\x0a05', '\x0a06', '\x0a07', '\x0a08',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0a09', '\x0a0a', '\x0000', '\x0a0f', '\x0a0f', '\x0a10', '\x0a10', '\x0a10',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0a13', '\x0a14', '\x0a14', '\x0a15', '\x0a16', '\x0a17', '\x0a18', '\x0a19',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0a1a', '\x0a1b', '\x0a1c', '\x0a1d', '\x0a1e', '\x0a1f', '\x0a20', '\x0a21',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0a22', '\x0a23', '\x0a24', '\x0a25', '\x0a26', '\x0a27', '\x0a28', '\x0a28',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0a2a', '\x0a2b', '\x0a2c', '\x0a2d', '\x0a2e', '\x0a2f', '\x0a2f', '\x0a30',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0a30', '\x0a32', '\x0a33', '\x0a33', '\x0a35', '\x0a36', '\x0a36', '\x0a38',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0a39', '\x0000', '\x0a3e', '\x0a3f', '\x0a40', '\x0a41', '\x0a42', '\x0000',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0a47', '\x0a47', '\x0a48', '\x0a48', '\x0a4b', '\x0a4b', '\x0a4c', '\x0a4c',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x0a4d', '\x0a3c', '\x002e', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0000', '\x0a66', '\x0a67', '\x0a68', '\x0a69', '\x0a6a', '\x0a6b', '\x0a6c',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0a6d', '\x0a6e', '\x0a6f', '\x0000', '\x0000', '\x0000', '\x0000', '\x0000'
- },
-
- // Alternate Unicode Char & Flags
- {
- // a0, a1, a2, a3, a4, a5, a6, a7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // a8, a9, aa, ab, ac, ad, ae, af,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // b0, b1, b2, b3, b4, b5, b6, b7,
- '\x0', '\x0', '\x0', '\x0', '\x0a59', '\x0a5a', '\x0', '\x0',
- // b8, b9, ba, bb, bc, bd, be, bf,
- '\x0', '\x0', '\x0a5b', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c0, c1, c2, c3, c4, c5, c6, c7,
- '\x0a5c', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // c8, c9, ca, cb, cc, cd, ce, cf,
- '\x0', '\x0a5e', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d0, d1, d2, d3, d4, d5, d6, d7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // d8, d9, da, db, dc, dd, de, df,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e0, e1, e2, e3, e4, e5, e6, e7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // e8, e9, ea, eb, ec, ed, ee, ef,
- '\x200C', '\x200D', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f0, f1, f2, f3, f4, f5, f6, f7,
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0',
- // f8, f9, fa, fb, fc, fd, fe, ff
- '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0', '\x0'
- }
- }
- };
- }
-
-}
diff --git a/src/mscorlib/src/System/Text/ISO2022Encoding.cs b/src/mscorlib/src/System/Text/ISO2022Encoding.cs
deleted file mode 100644
index fca579fe56..0000000000
--- a/src/mscorlib/src/System/Text/ISO2022Encoding.cs
+++ /dev/null
@@ -1,1983 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-
-//
-//
-// Notes:
-//
-// IsAlwaysNormalized ???
-// Regarding Normalization for ISO-2022-JP (50220, 50221, 50222), its the same rules as EUCJP
-// Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
-// Form D is precluded because of 0x00a8, which changes to space + dierises.
-//
-// Note: I think that IsAlwaysNormalized should probably return true for form C for Japanese 20932 based CPs.
-//
-// For ISO-2022-KR
-// Never normalized, C & D (& therefore KC & KD) are precluded because of Hangul syllables and combined characters.
-//
-// IsAlwaysNormalized ???
-// Regarding Normalization for ISO-2022-CN (50227, 50229) & HZ-GB2312 (52936) I think is similar to the Japanese case.
-// Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
-// Form D is precluded because of 0x00a8, which changes to space + dierises.
-//
-// Note: I think that IsAlwaysNormalized should probably return true for form C for Chinese 20936 based CPs.
-//
-#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
-namespace System.Text
-{
- using System.Globalization;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
- using System.Text;
- using System.Runtime.InteropServices;
- using System;
- using System.Security;
- using System.Runtime.CompilerServices;
- using System.Runtime.Serialization;
-
-
- /*=================================ISO2022Encoding============================
- **
- ** This is used to support ISO 2022 encodings that use shift/escape sequences.
- **
- ==============================================================================*/
-
- [Serializable]
- internal class ISO2022Encoding : DBCSCodePageEncoding
- {
- const byte SHIFT_OUT = (byte)0x0E;
- const byte SHIFT_IN = (byte)0x0F;
- const byte ESCAPE = 0x1B;
- const byte LEADBYTE_HALFWIDTH = 0x10;
-
- // We have to load the 936 code page tables, so impersonate 936 as our base
- // This pretends to be other code pages as far as memory sections are concerned.
- internal ISO2022Encoding(int codePage) : base(codePage, tableBaseCodePages[codePage % 10])
- {
- this.m_bUseMlangTypeForSerialization = true;
- }
-
- // Constructor called by serialization.
- // Note: We use the base GetObjectData however
- internal ISO2022Encoding(SerializationInfo info, StreamingContext context) : base(info, context)
- {
- // Actually this can't ever get called, CodePageEncoding is our proxy
- Debug.Assert(false, "Didn't expect to make it to DBCSCodePageEncoding serialization constructor");
- throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
- }
-
- static int[] tableBaseCodePages =
- {
- 932, // 50220 ISO-2022-JP, No halfwidth Katakana, convert to full width
- 932, // 50221 ISO-2022-JP, Use escape sequence for half width Katakana
- 932, // 50222 ISO-2022-JP, Use shift-in/shift-out for half width Katakana
- 0,
- 0,
- 949, // 50225 ISO-2022-KR, Korean
- 936, // 52936 HZ-GB2312, 936 might be better source
- 0, //20936, // 50227 ISO-2022-CN, Note: This is just the same as CP 936 in Everett.
- 0,
- // 50229 is currently unsupported, CP 20000 is currently not built in .nlp file
- 0, //20000, // 50229 ISO-2022-CN, ModeCNS11643_1
- 0, //20000, // 50229 ISO-2022-CN, ModeCNS11643_2
- 0 // ModeASCII
- };
-
- internal enum ISO2022Modes
- {
- ModeHalfwidthKatakana = 0,
- ModeJIS0208 = 1,
- ModeKR = 5,
- ModeHZ = 6,
- ModeGB2312 = 7,
- ModeCNS11643_1 = 9,
- ModeCNS11643_2 = 10,
- ModeASCII = 11,
-
- ModeIncompleteEscape = -1,
- ModeInvalidEscape = -2,
- ModeNOOP = -3
- }
-
- protected unsafe override String GetMemorySectionName()
- {
- int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
-
- String strFormat;
-
- switch (this.CodePage)
- {
- case 50220:
- case 50221:
- case 50222:
- strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022JP";
- break;
- case 50225:
- strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022KR";
- break;
- case 52936:
- strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_HZ";
- break;
- default:
- Debug.Assert(false, "[ISO2022Encoding.GetMemorySectionName] Don't expect to get here for code page " + this.CodePage);
- strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}";
- break;
- }
-
- String strName = String.Format(CultureInfo.InvariantCulture, strFormat,
- iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
- this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
-
- return strName;
- }
-
- // Clean up characters for ISO2022 code pages, etc.
- // ISO2022 (50220, 50221, 50222)
- // GB-HZ (52936)
- protected override bool CleanUpBytes(ref int bytes)
- {
- switch (this.CodePage)
- {
- // 932 based code pages
- case 50220:
- case 50221:
- case 50222:
- {
- if (bytes >= 0x100)
- {
- // map extended char (0xfa40-0xfc4b) to a special range
- // (ported from mlang)
- if (bytes >= 0xfa40 && bytes <= 0xfc4b)
- {
- if ( bytes >= 0xfa40 && bytes <= 0xfa5b )
- {
- if ( bytes <= 0xfa49 )
- bytes = bytes - 0x0b51 ;
- else if ( bytes >= 0xfa4a && bytes <= 0xfa53 )
- bytes = bytes - 0x072f6 ;
- else if ( bytes >= 0xfa54 && bytes <= 0xfa57 )
- bytes = bytes - 0x0b5b ;
- else if ( bytes == 0xfa58 )
- bytes = 0x878a ;
- else if ( bytes == 0xfa59 )
- bytes = 0x8782 ;
- else if ( bytes == 0xfa5a )
- bytes = 0x8784 ;
- else if ( bytes == 0xfa5b )
- bytes = 0x879a ;
- }
- else if ( bytes >= 0xfa5c && bytes <= 0xfc4b )
- {
- byte tc = unchecked((byte)bytes);
- if ( tc < 0x5c )
- bytes = bytes - 0x0d5f;
- else if ( tc >= 0x80 && tc <= 0x9B )
- bytes = bytes - 0x0d1d;
- else
- bytes = bytes - 0x0d1c;
- }
- }
-
- // Convert 932 code page to 20932 like code page range
- // (also ported from mlang)
- byte bLead = unchecked((byte)(bytes >> 8));
- byte bTrail = unchecked((byte)bytes);
-
- bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
- bLead = (byte)((bLead << 1) + 1);
- if (bTrail > (byte)0x9e)
- {
- bTrail -= (byte)0x7e;
- bLead++;
- }
- else
- {
- if (bTrail > (byte)0x7e)
- bTrail--;
- bTrail -= (byte)0x1f;
- }
-
- bytes = ((int)bLead) << 8 | (int)bTrail;
-
- // Don't step out of our allocated lead byte area.
- // All DBCS lead and trail bytes should be >= 0x21 and <= 0x7e
- // This is commented out because Everett/Mlang had illegal PUA
- // mappings to ISO2022 code pages that we're maintaining.
-// if ((bytes & 0xFF00) < 0x2100 || (bytes & 0xFF00) > 0x7e00 ||
- // (bytes & 0xFF) < 0x21 || (bytes & 0xFF) > 0x7e)
- // return false;
- }
- else
- {
- // Adjust 1/2 Katakana
- if (bytes >= 0xa1 && bytes <= 0xdf)
- bytes += (LEADBYTE_HALFWIDTH << 8) - 0x80;
-
- // 0x81-0x9f and 0xe0-0xfc CP 932
- // 0x8e and 0xa1-0xfe CP 20932 (we don't use 8e though)
- // b0-df is 1/2 Katakana
- if (bytes >= 0x81 &&
- (bytes <= 0x9f ||
- (bytes >= 0xe0 && bytes <= 0xfc)))
- {
- // Don't do lead bytes, we use escape sequences instead.
- return false;
- }
- }
- break;
- }
- case 50225:
- {
- // For 50225 since we don't rely on lead byte marks, return false and don't add them,
- // esp. since we're only a 7 bit code page.
- if (bytes >= 0x80 && bytes <= 0xff)
- return false;
-
- // Ignore characters out of range (a1-7f)
- if (bytes >= 0x100 &&
- ((bytes & 0xff) < 0xa1 || (bytes & 0xff) == 0xff ||
- (bytes & 0xff00) < 0xa100 || (bytes & 0xff00) == 0xff00))
- return false;
-
- // May as well get them into our 7 bit range
- bytes &= 0x7f7f;
-
- break;
- }
- case 52936:
- {
- // Since we don't rely on lead byte marks for 52936, get rid of them so we
- // don't end up with extra wierd fffe mappings.
- if (bytes >= 0x81 && bytes <= 0xfe)
- return false;
-
- break;
- }
- }
-
- return true;
- }
-
- // GetByteCount
- internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(count >= 0, "[ISO2022Encoding.GetByteCount]count is negative");
- Debug.Assert(chars != null, "[ISO2022Encoding.GetByteCount]chars is null");
-
- // Just call GetBytes with null byte* to get count
- return GetBytes(chars, count, null, 0, baseEncoder);
- }
-
- internal override unsafe int GetBytes(char* chars, int charCount,
- byte* bytes, int byteCount, EncoderNLS baseEncoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(chars != null, "[ISO2022Encoding.GetBytes]chars is null");
- Debug.Assert(byteCount >= 0, "[ISO2022Encoding.GetBytes]byteCount is negative");
- Debug.Assert(charCount >= 0, "[ISO2022Encoding.GetBytes]charCount is negative");
-
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[ISO2022Encoding.GetBytes]Attempting to use null encoder fallback");
-
- // Fix our encoder
- ISO2022Encoder encoder = (ISO2022Encoder)baseEncoder;
-
- // Our return value
- int iCount = 0;
-
- switch(CodePage)
- {
- case 50220:
- case 50221:
- case 50222:
- iCount = GetBytesCP5022xJP( chars, charCount, bytes, byteCount, encoder );
- break;
- case 50225:
- iCount = GetBytesCP50225KR( chars, charCount, bytes, byteCount, encoder );
- break;
-// Everett had 50227 the same as 936
-/* case 50227:
- iCount = GetBytesCP50227CN( chars, charCount, bytes, byteCount, encoder );
- break;
-*/
- case 52936:
- iCount = GetBytesCP52936( chars, charCount, bytes, byteCount, encoder );
- break;
- }
-
- return iCount;
- }
-
- // This is internal and called by something else,
- internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
- {
- // Just assert, we're called internally so these should be safe, checked already
- Debug.Assert(bytes != null, "[ISO2022Encoding.GetCharCount]bytes is null");
- Debug.Assert(count >= 0, "[ISO2022Encoding.GetCharCount]byteCount is negative");
-
- // Just call getChars with null char* to get count
- return GetChars(bytes, count, null, 0, baseDecoder);
- }
-
- internal override unsafe int GetChars(byte* bytes, int byteCount,
- char* chars, int charCount, DecoderNLS baseDecoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[ISO2022Encoding.GetChars]bytes is null");
- Debug.Assert(byteCount >= 0, "[ISO2022Encoding.GetChars]byteCount is negative");
- Debug.Assert(charCount >= 0, "[ISO2022Encoding.GetChars]charCount is negative");
-
- // Fix our decoder
- ISO2022Decoder decoder = (ISO2022Decoder)baseDecoder;
- int iCount = 0;
-
- switch (CodePage)
- {
- case 50220:
- case 50221:
- case 50222:
- iCount = GetCharsCP5022xJP( bytes, byteCount, chars, charCount, decoder);
- break;
- case 50225:
- iCount = GetCharsCP50225KR( bytes, byteCount, chars, charCount, decoder);
- break;
- // Currently 50227 is the same as 936
-// case 50227:
- // iCount = GetCharsCP50227CN( bytes, byteCount, chars, charCount, decoder);
- // break;
- case 52936:
- iCount = GetCharsCP52936( bytes, byteCount, chars, charCount, decoder);
- break;
- default:
- Debug.Assert(false, "[ISO2022Encoding.GetChars] had unexpected code page");
- break;
- }
-
- return iCount;
- }
-
- // ISO 2022 Code pages for JP.
- // 50220 - No halfwidth Katakana, convert to full width
- // 50221 - Use escape sequence for half width Katakana
- // 50222 - Use shift-in/shift-out for half width Katakana
- //
- // These are the JIS code pages, superset of ISO-2022 / ISO-2022-JP-1
- // 0E Shift Out (following bytes are Katakana)
- // 0F Shift In (back to "normal" behavior)
- // 21-7E Byte ranges (1 or 2 bytes)
- // <ESC> $ @ To Double Byte 0208 Mode (actually older code page, but subset of 0208)
- // <ESC> $ B To Double Byte 0208 Mode (duplicate)
- // <ESC> $ ( D To Double Byte 0212 Mode (previously we misinterpreted this)
- // <ESC> $ I To half width Katakana
- // <ESC> ( J To JIS-Roman
- // <ESC> ( H To JIS-Roman (swedish character set)
- // <ESC> ( B To ASCII
- // <ESC> & @ Alternate lead in to <ESC> $ B so just ignore it.
- //
- // So in Katakana mode we add 0x8e as a lead byte and use CP 20932 to convert it
- // In ASCII mode we just spit out the single byte.
- // In Roman mode we should change 0x5c (\) -> Yen sign and 0x7e (~) to Overline, however
- // we didn't in mLang, otherwise roman is like ASCII.
- // In 0208 double byte mode we have to |= with 0x8080 and use CP 20932 to convert it.
- // In 0212 double byte mode we have to |= with 0x8000 and use CP 20932 to convert it.
- //
- // Note that JIS Shift In/Shift Out is different than the other ISO2022 encodings. For JIS
- // Shift out always shifts to half-width Katakana. Chinese encodings use designator sequences
- // instead of escape sequences and shift out to the designated sequence or back in to ASCII.
- //
- // When decoding JIS 0208, MLang used a '*' (0x2a) character in JIS 0208 mode to map the trailing byte
- // to halfwidth katakana. I found no description of that behavior, however that block of 0208 is
- // undefined, so we maintain that behavior when decoding. We will never generate characters using
- // that technique, but the decoder will process them.
- //
- private unsafe int GetBytesCP5022xJP(char* chars, int charCount,
- byte* bytes, int byteCount, ISO2022Encoder encoder)
- {
- // prepare our helpers
- Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
- this, encoder, bytes, byteCount, chars, charCount);
-
- // Get our mode
- ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Mode
- ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII; // Mode that shift in will go back to (only used by CP 50222)
-
- // Check our encoder
- if (encoder != null)
- {
- char charLeftOver = encoder.charLeftOver;
-
- currentMode = encoder.currentMode;
- shiftInMode = encoder.shiftInOutMode;
-
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP5022xJP]leftover character should be high surrogate");
-
- // It has to be a high surrogate, which we don't support, so it has to be a fallback
- buffer.Fallback(charLeftOver);
- }
- }
-
- while (buffer.MoreData)
- {
- // Get our char
- char ch = buffer.GetNextChar();
-
- // Get our bytes
- ushort iBytes = mapUnicodeToBytes[ch];
-
- StartConvert:
- // Check for halfwidth bytes
- byte bLeadByte = (byte)(iBytes >> 8);
- byte bTrailByte = (byte)(iBytes & 0xff);
-
- if (bLeadByte == LEADBYTE_HALFWIDTH)
- {
- // Its Halfwidth Katakana
- if (CodePage == 50220)
- {
- // CodePage 50220 doesn't use halfwidth Katakana, convert to fullwidth
- // See if its out of range, fallback if so, throws if recursive fallback
- if (bTrailByte < 0x21 || bTrailByte >= 0x21 + HalfToFullWidthKanaTable.Length)
- {
- buffer.Fallback(ch);
- continue;
- }
-
- // Get the full width katakana char to use.
- iBytes = unchecked((ushort)(HalfToFullWidthKanaTable[bTrailByte - 0x21] & 0x7F7F));
-
- // May have to do all sorts of fun stuff for mode, go back to start convert
- goto StartConvert;
- }
-
- // Can use halfwidth Katakana, make sure we're in right mode
-
- // Make sure we're in right mode
- if (currentMode != ISO2022Modes.ModeHalfwidthKatakana)
- {
- // 50222 or 50221, either shift in/out or escape to get to Katakana mode
- if (CodePage == 50222)
- {
- // Shift Out
- if (!buffer.AddByte(SHIFT_OUT))
- break; // convert out of space, stop
-
- // Don't change modes until after AddByte in case it fails for convert
- // We get to shift out to Katakana, make sure we'll go back to the right mode
- // (This ends up always being ASCII)
- shiftInMode = currentMode;
- currentMode = ISO2022Modes.ModeHalfwidthKatakana;
- }
- else
- {
- // 50221 does halfwidth katakana by escape sequence
- Debug.Assert(CodePage == 50221, "[ISO2022Encoding.GetBytesCP5022xJP]Expected Code Page 50221");
-
- // Add our escape sequence
- if (!buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'I')))
- break; // convert out of space, stop
-
- currentMode = ISO2022Modes.ModeHalfwidthKatakana;
- }
- }
-
- // We know we're in Katakana mode now, so add it.
- // Go ahead and add the Katakana byte. Our table tail bytes are 0x80 too big.
- if (!buffer.AddByte(unchecked((byte)(bTrailByte & 0x7F))))
- break; // convert out of space, stop
-
- // Done with this one
- continue;
- }
- else if (bLeadByte != 0)
- {
- //
- // It's a double byte character.
- //
-
- // If we're CP 50222 we may have to shift in from Katakana mode first
- if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
- {
- // Shift In
- if (!buffer.AddByte(SHIFT_IN))
- break; // convert out of space, stop
-
- // Need to shift in from katakana. (Still might not be right, but won't be shifted out anyway)
- currentMode = shiftInMode;
- }
-
- // Make sure we're in the right mode (JIS 0208 or JIS 0212)
- // Note: Right now we don't use JIS 0212. Also this table'd be wrong
-
- // Its JIS extension 0208
- if (currentMode != ISO2022Modes.ModeJIS0208)
- {
- // Escape sequence, we can fail after this, mode will be correct for convert
- if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)'B')))
- break; // Convert out of space, stop
-
- currentMode = ISO2022Modes.ModeJIS0208;
- }
-
- // Add our double bytes
- if (!buffer.AddByte(unchecked((byte)(bLeadByte)), unchecked((byte)(bTrailByte))))
- break; // Convert out of space, stop
- continue;
- }
- else if (iBytes != 0 || ch == 0)
- {
- // Single byte Char
- // If we're CP 50222 we may have to shift in from Katakana mode first
- if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
- {
- // Shift IN
- if (!buffer.AddByte(SHIFT_IN))
- break; // convert ran out of room
-
- // Need to shift in from katakana. (Still might not be right, but won't be shifted out anyway)
- currentMode = shiftInMode;
- }
-
- // Its a single byte character, switch to ASCII if we have to
- if (currentMode != ISO2022Modes.ModeASCII)
- {
- if (!buffer.AddByte(ESCAPE,unchecked((byte)'('), unchecked((byte)'B')))
- break; // convert ran out of room
-
- currentMode = ISO2022Modes.ModeASCII;
- }
-
- // Add the ASCII char
- if (!buffer.AddByte(bTrailByte))
- break; // convert had no room left
- continue;
- }
-
- // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar)
- buffer.Fallback(ch);
- }
-
- // Switch back to ASCII if MustFlush or no encoder
- if (currentMode != ISO2022Modes.ModeASCII &&
- (encoder == null || encoder.MustFlush))
- {
- // If we're CP 50222 we may have to shift in from Katakana mode first
- if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
- {
- // Shift IN, only shift mode if necessary.
- if (buffer.AddByte(SHIFT_IN))
- // Need to shift in from katakana. (Still might not be right, but won't be shifted out anyway)
- currentMode = shiftInMode;
- else
- // If not successful, convert will maintain state for next time, also
- // AddByte will have decremented our char count, however we need it to remain the same
- buffer.GetNextChar();
- }
-
- // switch back to ASCII to finish neatly
- if (currentMode != ISO2022Modes.ModeASCII &&
- (CodePage != 50222 || currentMode != ISO2022Modes.ModeHalfwidthKatakana))
- {
- // only shift if it was successful
- if (buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'B')))
- currentMode = ISO2022Modes.ModeASCII;
- else
- // If not successful, convert will maintain state for next time, also
- // AddByte will have decremented our char count, however we need it to remain the same
- buffer.GetNextChar();
- }
- }
-
- // Remember our encoder state
- if (bytes != null && encoder != null)
- {
- // This is ASCII if we had to flush
- encoder.currentMode = currentMode;
- encoder.shiftInOutMode = shiftInMode;
-
- if (!buffer.fallbackBuffer.bUsedEncoder)
- {
- encoder.charLeftOver = (char)0;
- }
-
- encoder.m_charsUsed = buffer.CharsUsed;
- }
-
- // Return our length
- return buffer.Count;
- }
-
- // ISO 2022 Code pages for Korean - CP 50225
- //
- // CP 50225 has Shift In/Shift Out codes, and a single designator sequence that is supposed
- // to appear once in the file, at the beginning of a line, before any multibyte code points.
- // So we stick the designator at the beginning of the output.
- //
- // These are the KR code page codes for ISO-2022-KR
- // 0E Shift Out (following bytes are double byte)
- // 0F Shift In (back to ASCII behavior)
- // 21-7E Byte ranges (1 or 2 bytes)
- // <ESC> $)C Double byte ISO-2022-KR designator
- //
- // Note that this encoding is a little different than other encodings. The <esc>$)C sequence
- // should only appear once per file. (Actually I saw another spec/rfc that said at the beginning
- // of each line, but it shouldn't really matter.)
- //
- // During decoding Mlang accepted ' ', '\t, and '\n' as their respective characters, even if
- // it was in double byte mode. We maintain that behavior, although I couldn't find a reference or
- // reason for that behavior. We never generate data using that shortcut.
- //
- // Also Mlang always assumed KR mode, even if the designator wasn't found yet, so we do that as
- // well. So basically we just ignore <ESC>$)C when decoding.
- //
- private unsafe int GetBytesCP50225KR(char* chars, int charCount,
- byte* bytes, int byteCount, ISO2022Encoder encoder)
- {
- // prepare our helpers
- Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
- this, encoder, bytes, byteCount, chars, charCount);
-
- // Get our mode
- ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Mode
- ISO2022Modes shiftOutMode = ISO2022Modes.ModeASCII; // ModeKR if already stamped lead bytes
-
- // Check our encoder
- if (encoder != null)
- {
- // May have leftover stuff
- char charLeftOver = encoder.charLeftOver;
- currentMode = encoder.currentMode;
- shiftOutMode = encoder.shiftInOutMode;
-
- // We may have a l left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP50225KR]leftover character should be high surrogate");
-
- // It has to be a high surrogate, which we don't support, so it has to be a fallback
- buffer.Fallback(charLeftOver);
- }
- }
-
- while (buffer.MoreData)
- {
- // Get our data
- char ch = buffer.GetNextChar();
-
- // Get our bytes
- ushort iBytes = mapUnicodeToBytes[ch];
-
- // Check for double byte bytes
- byte bLeadByte = (byte)(iBytes >> 8);
- byte bTrailByte = (byte)(iBytes & 0xff);
-
- if (bLeadByte != 0)
- {
- //
- // It's a double byte character.
- //
-
- // If we haven't done our Korean designator, then do so, if we have any input
- if (shiftOutMode != ISO2022Modes.ModeKR)
- {
- // Add our code page designator sequence
- if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)')'), unchecked((byte)'C')))
- break; // No room during convert.
-
- shiftOutMode = ISO2022Modes.ModeKR;
- }
-
- // May have to switch to ModeKR first
- if (currentMode != ISO2022Modes.ModeKR)
- {
- if (!buffer.AddByte(SHIFT_OUT))
- break; // No convert room
-
- currentMode = ISO2022Modes.ModeKR;
- }
-
- // Add the bytes
- if (!buffer.AddByte(bLeadByte, bTrailByte))
- break; // no convert room
- continue;
- }
- else if (iBytes != 0 || ch == 0)
- {
- // Its a single byte character, switch to ASCII if we have to
- if (currentMode != ISO2022Modes.ModeASCII)
- {
- if (!buffer.AddByte(SHIFT_IN))
- break;
-
- currentMode = ISO2022Modes.ModeASCII;
- }
-
- // Add the ASCII char
- if (!buffer.AddByte(bTrailByte))
- break;
- continue;
- }
-
- // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar)
- buffer.Fallback(ch);
- }
-
- // Switch back to ASCII if MustFlush or no encoder
- if (currentMode != ISO2022Modes.ModeASCII &&
- (encoder == null || encoder.MustFlush))
- {
- // Get back to ASCII to be safe. Only do it if it success.
- if (buffer.AddByte(SHIFT_IN))
- currentMode = ISO2022Modes.ModeASCII;
- else
- // If not successful, convert will maintain state for next time, also
- // AddByte will have decremented our char count, however we need it to remain the same
- buffer.GetNextChar();
- }
-
- // Remember our encoder state
- if (bytes != null && encoder != null)
- {
- // If we didn't use the encoder, then there's no chars left over
- if (!buffer.fallbackBuffer.bUsedEncoder)
- {
- encoder.charLeftOver = (char)0;
- }
-
- // This is ASCII if we had to flush
- encoder.currentMode = currentMode;
-
- // We don't use shift out mode, but if we've flushed we need to reset it so it doesn't
- // get output again.
- if (!encoder.MustFlush || encoder.charLeftOver != (char)0)
- {
- // We should be not flushing or converting
- Debug.Assert(!encoder.MustFlush || !encoder.m_throwOnOverflow,
- "[ISO2022Encoding.GetBytesCP50225KR]Expected no left over data or not flushing or not converting");
- encoder.shiftInOutMode = shiftOutMode;
- }
- else
- encoder.shiftInOutMode = ISO2022Modes.ModeASCII;
-
- encoder.m_charsUsed = buffer.CharsUsed;
- }
-
- // Return our length
- return buffer.Count;
- }
-
- // CP52936 is HZ Encoding
- // HZ Encoding has 4 shift sequences:
- // ~~ '~' (\u7e)
- // ~} shift into 1 byte mode,
- // ~{ shift into 2 byte GB 2312-80
- // ~<NL> Maintain 2 byte mode across new lines (ignore both ~ and <NL> characters)
- // (This is for mailers that restrict to 70 or 80 or whatever character lines)
- //
- // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
- // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
- // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
- //
- // This encoding is designed for transmission by e-mail and news. No bytes should have high bit set.
- // (all bytes <= 0x7f)
- private unsafe int GetBytesCP52936(char* chars, int charCount,
- byte* bytes, int byteCount, ISO2022Encoder encoder)
- {
- // prepare our helpers
- Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
- this, encoder, bytes, byteCount, chars, charCount);
-
- // Mode
- ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
-
- // Check our encoder
- if (encoder != null)
- {
- char charLeftOver = encoder.charLeftOver;
- currentMode = encoder.currentMode;
-
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP52936]leftover character should be high surrogate");
-
- // It has to be a high surrogate, which we don't support, so it has to be a fallback
- buffer.Fallback(charLeftOver);
- }
- }
-
- while (buffer.MoreData)
- {
- // Get our char
- char ch = buffer.GetNextChar();
-
- // Get our bytes
- ushort sChar = mapUnicodeToBytes[ch];
- if (sChar == 0 && ch != 0)
- {
- // Wasn't a legal byte sequence, its a surrogate or fallback
- // Throws if recursive (knows because we called InternalGetNextChar)
- buffer.Fallback(ch);
-
- // Done with our char, now process fallback
- continue;
- }
-
- // Check for halfwidth bytes
- byte bLeadByte = (byte)(sChar >> 8);
- byte bTrailByte = (byte)(sChar & 0xff);
-
- // If its a double byte, it has to fit in the lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe range
- // (including the 0x8080 that our codepage or's to the value)
- if ((bLeadByte != 0 &&
- (bLeadByte < 0xa1 || bLeadByte > 0xf7 || bTrailByte < 0xa1 || bTrailByte > 0xfe)) ||
- (bLeadByte == 0 && bTrailByte > 0x80 && bTrailByte != 0xff))
- {
- // Illegal character, in 936 code page, but not in HZ subset, get fallback for it
- buffer.Fallback(ch);
- continue;
- }
-
- // sChar is now either ASCII or has an 0x8080 mask
- if (bLeadByte != 0)
- {
- // Its a double byte mode
- if (currentMode != ISO2022Modes.ModeHZ)
- {
- // Need to add the double byte mode marker
- if (!buffer.AddByte((byte)'~', (byte)'{', 2))
- break; // Stop if no buffer space in convert
-
- currentMode = ISO2022Modes.ModeHZ;
- }
-
- // Go ahead and add the 2 bytes
- if (!buffer.AddByte(unchecked((byte)(bLeadByte & 0x7f)), unchecked((byte)(bTrailByte & 0x7f))))
- break; // Stop if no buffer space in convert
- }
- else
- {
- // Its supposed to be ASCII
- if (currentMode != ISO2022Modes.ModeASCII)
- {
- // Need to add the ASCII mode marker
- // Will have 1 more byte (or 2 if ~)
- if (!buffer.AddByte((byte)'~', (byte)'}', bTrailByte == '~' ? 2:1))
- break;
-
- currentMode = ISO2022Modes.ModeASCII;
- }
-
- // If its a '~' we'll need an extra one
- if (bTrailByte == '~')
- {
- // Need to add the extra ~
- if (!buffer.AddByte((byte)'~', 1))
- break;
- }
-
- // Need to add the character
- if (!buffer.AddByte(bTrailByte))
- break;
- }
- }
-
- // Add ASCII shift out if we're at end of decoder
- if (currentMode != ISO2022Modes.ModeASCII &&
- (encoder == null || encoder.MustFlush))
- {
- // Need to add the ASCII mode marker
- // Only turn off other mode if this works
- if (buffer.AddByte((byte)'~',(byte)'}'))
- currentMode = ISO2022Modes.ModeASCII;
- else
- // If not successful, convert will maintain state for next time, also
- // AddByte will have decremented our char count, however we need it to remain the same
- buffer.GetNextChar();
- }
-
- // Need to remember our mode
- if (encoder != null && bytes != null)
- {
- // This is ASCII if we had to flush
- encoder.currentMode = currentMode;
-
- if (!buffer.fallbackBuffer.bUsedEncoder)
- {
- encoder.charLeftOver = (char)0;
- }
-
- encoder.m_charsUsed = buffer.CharsUsed;
- }
-
- // Return our length
- return buffer.Count;
- }
-
- private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount,
- char* chars, int charCount, ISO2022Decoder decoder)
- {
- // Get our info.
- Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
- this, decoder, chars, charCount, bytes, byteCount);
-
- // No mode information yet
- ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Our current Mode
- ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII; // Mode that we'll shift in to
- byte[] escapeBytes = new byte[4];
- int escapeCount = 0;
-
- if (decoder != null)
- {
- currentMode = decoder.currentMode;
- shiftInMode = decoder.shiftInOutMode;
-
- // See if we have leftover decoder buffer to use
- // Load our bytesLeftOver
- escapeCount = decoder.bytesLeftOverCount;
-
- // Don't want to mess up decoder if we're counting or throw an exception
- for (int i = 0; i < escapeCount; i++)
- escapeBytes[i] = decoder.bytesLeftOver[i];
- }
-
- // Do this until the end
- while (buffer.MoreData || escapeCount > 0)
- {
- byte ch;
-
- if (escapeCount > 0)
- {
- // Get more escape sequences if necessary
- if (escapeBytes[0] == ESCAPE)
- {
- // Stop if no more input
- if (!buffer.MoreData)
- {
- if (decoder != null && !decoder.MustFlush)
- break;
- }
- else
- {
- // Add it to the sequence we can check
- escapeBytes[escapeCount++] = buffer.GetNextByte();
-
- // We have an escape sequence
- ISO2022Modes modeReturn =
- CheckEscapeSequenceJP(escapeBytes, escapeCount);
-
- if (modeReturn != ISO2022Modes.ModeInvalidEscape)
- {
- if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
- {
- // Processed escape correctly
- escapeCount = 0;
-
- // We're now this mode
- currentMode = shiftInMode = modeReturn;
- }
-
- // Either way, continue to get next escape or real byte
- continue;
- }
- }
-
- // If ModeInvalidEscape, or no input & must flush, then fall through to add escape.
- }
-
- // Read next escape byte and move them down one.
- ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
- }
- else
- {
- // Get our next byte
- ch = buffer.GetNextByte();
-
- if (ch == ESCAPE)
- {
- // We'll have an escape sequence, use it if we don't have one buffered already
- if (escapeCount == 0)
- {
- // Start this new escape sequence
- escapeBytes[0] = ch;
- escapeCount = 1;
- continue;
- }
-
- // Flush the previous escape sequence, then reuse this escape byte
- buffer.AdjustBytes(-1);
- }
- }
-
- if (ch == SHIFT_OUT)
- {
- shiftInMode = currentMode;
- currentMode = ISO2022Modes.ModeHalfwidthKatakana;
- continue;
- }
- else if (ch == SHIFT_IN)
- {
- currentMode = shiftInMode;
- continue;
- }
-
- // Get our full character
- ushort iBytes = ch;
- bool b2Bytes = false;
-
- if (currentMode == ISO2022Modes.ModeJIS0208)
- {
- //
- // To handle errors, we need to check:
- // 1. if trailbyte is there
- // 2. if code is valid
- //
- if (escapeCount > 0)
- {
- // Let another escape fall through
- if (escapeBytes[0] != ESCAPE)
- {
- // Move them down one & get the next data
- iBytes <<= 8;
- iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
- b2Bytes = true;
- }
- }
- else if (buffer.MoreData)
- {
- iBytes <<= 8;
- iBytes |= buffer.GetNextByte();
- b2Bytes = true;
- }
- else
- {
- // Not enough input, use decoder if possible
- if (decoder == null || decoder.MustFlush)
- {
- // No decoder, do fallback for this byte
- buffer.Fallback(ch);
- break;
- }
-
- // Stick it in the decoder if we're not counting
- if (chars != null)
- {
- escapeBytes[0] = ch;
- escapeCount = 1;
- }
- break;
- }
-
- // MLang treated JIS 0208 '*' lead byte like a single halfwidth katakana
- // escape, so use 0x8e00 as katakana lead byte and keep same trail byte.
- // 0x2a lead byte range is normally unused in JIS 0208, so shouldn't have
- // any wierd compatibility issues.
- if ((b2Bytes == true) && ((iBytes & 0xff00) == 0x2a00))
- {
- iBytes = (ushort)(iBytes & 0xff);
- iBytes |= (LEADBYTE_HALFWIDTH << 8); // Put us in the halfwidth katakana range
- }
- }
- else if (iBytes >= 0xA1 && iBytes <= 0xDF)
- {
- // Everett accidentally mapped Katakana like shift-jis (932),
- // even though this is a 7 bit code page. We keep that mapping
- iBytes |= (LEADBYTE_HALFWIDTH << 8); // Map to halfwidth katakana range
- iBytes &= 0xff7f; // remove extra 0x80
- }
- else if (currentMode == ISO2022Modes.ModeHalfwidthKatakana )
- {
- // Add 0x10 lead byte that our encoding expects for Katakana:
- iBytes |= (LEADBYTE_HALFWIDTH << 8);
- }
-
- // We have an iBytes to try to convert.
- char c = mapBytesToUnicode[iBytes];
-
- // See if it was unknown
- if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
- {
- // Have to do fallback
- if (b2Bytes)
- {
- if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
- break;
- }
- else
- {
- if (!buffer.Fallback(ch))
- break;
- }
- }
- else
- {
- // If we were JIS 0208, then we consumed an extra byte
- if (!buffer.AddChar(c, b2Bytes ? 2:1))
- break;
- }
- }
-
- // Make sure our decoder state matches our mode, if not counting
- if (chars != null && decoder != null)
- {
- // Remember it if we don't flush
- if (!decoder.MustFlush || escapeCount != 0)
- {
- // Either not flushing or had state (from convert)
- Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
- "[ISO2022Encoding.GetCharsCP5022xJP]Expected no state or not converting or not flushing");
-
- decoder.currentMode = currentMode;
- decoder.shiftInOutMode = shiftInMode;
-
- // Remember escape buffer
- decoder.bytesLeftOverCount = escapeCount;
- decoder.bytesLeftOver = escapeBytes;
- }
- else
- {
- // We flush, clear buffer
- decoder.currentMode = ISO2022Modes.ModeASCII;
- decoder.shiftInOutMode = ISO2022Modes.ModeASCII;
- decoder.bytesLeftOverCount = 0;
- // Slightly different if counting/not counting
- }
-
- decoder.m_bytesUsed = buffer.BytesUsed;
- }
-
- // Return # of characters we found
- return buffer.Count;
- }
-
- // We know we have an escape sequence, so check it starting with the byte after the escape
- private ISO2022Modes CheckEscapeSequenceJP( byte[] bytes, int escapeCount )
- {
- // Have an escape sequence
- if (bytes[0] != ESCAPE)
- return ISO2022Modes.ModeInvalidEscape;
-
- if (escapeCount < 3)
- return ISO2022Modes.ModeIncompleteEscape;
-
- if (bytes[1] == '(')
- {
- if (bytes[2] == 'B') // <esc>(B
- {
- return ISO2022Modes.ModeASCII;
- }
- else if (bytes[2] == 'H') // <esc>(H
- {
- // Actually this is supposed to be Swedish
- // We treat it like ASCII though.
- return ISO2022Modes.ModeASCII;
- }
- else if (bytes[2] == 'J') // <esc>(J
- {
- // Actually this is supposed to be Roman
- // 2 characters are different, but historically we treat it as ascii
- return ISO2022Modes.ModeASCII;
- }
- else if (bytes[2] == 'I') // <esc>(I
- {
- return ISO2022Modes.ModeHalfwidthKatakana;
- }
- }
- else if (bytes[1] == '$')
- {
- if (bytes[2] == '@' || // <esc>$@
- bytes[2] == 'B') // <esc>$B
- {
- return ISO2022Modes.ModeJIS0208;
- }
- else
- {
- // Looking for <esc>$(D
- if (escapeCount < 4)
- return ISO2022Modes.ModeIncompleteEscape;
-
- if (bytes[2] == '(' && bytes[3] == 'D') // <esc>$(D
- {
- // Mlang treated 0208 like 0212 even though that's wrong
- return ISO2022Modes.ModeJIS0208;
- }
- }
- }
- else if (bytes[1] == '&')
- {
- if (bytes[2] == '@') // <esc>&@
- {
- // Ignore ESC & @ (prefix to <esc>$B)
- return ISO2022Modes.ModeNOOP;
- }
- }
-
- // If we get here we fell through and have an invalid/unknown escape sequence
- return ISO2022Modes.ModeInvalidEscape;
- }
-
- private byte DecrementEscapeBytes(ref byte[] bytes, ref int count)
- {
- Debug.Assert(count > 0, "[ISO2022Encoding.DecrementEscapeBytes]count > 0");
-
- // Decrement our count
- count--;
-
- // Remember the first one
- byte returnValue = bytes[0];
-
- // Move them down one.
- for (int i = 0; i < count; i++)
- {
- bytes[i] = bytes[i+1];
- }
-
- // Clear out the last byte
- bytes[count] = 0;
-
- // Return the old 1st byte
- return returnValue;
- }
-
- // Note that in DBCS mode mlang passed through ' ', '\t' and '\n' as SBCS characters
- // probably to allow mailer formatting without too much extra work.
- private unsafe int GetCharsCP50225KR(byte* bytes, int byteCount,
- char* chars, int charCount, ISO2022Decoder decoder)
- {
- // Get our info.
- Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
- this, decoder, chars, charCount, bytes, byteCount);
-
- // No mode information yet
- ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Our current Mode
-
- byte[] escapeBytes = new byte[4];
- int escapeCount = 0;
-
- if (decoder != null)
- {
- currentMode = decoder.currentMode;
-
- // See if we have leftover decoder buffer to use
- // Load our bytesLeftOver
- escapeCount = decoder.bytesLeftOverCount;
-
- // Don't want to mess up decoder if we're counting or throw an exception
- for (int i = 0; i < escapeCount; i++)
- escapeBytes[i] = decoder.bytesLeftOver[i];
- }
-
- // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
- while (buffer.MoreData || escapeCount > 0)
- {
- byte ch;
-
- if (escapeCount > 0)
- {
- // Get more escape sequences if necessary
- if (escapeBytes[0] == ESCAPE)
- {
- // Stop if no more input
- if (!buffer.MoreData)
- {
- if (decoder != null && !decoder.MustFlush)
- break;
- }
- else
- {
- // Add it to the sequence we can check
- escapeBytes[escapeCount++] = buffer.GetNextByte();
-
- // We have an escape sequence
- ISO2022Modes modeReturn =
- CheckEscapeSequenceKR(escapeBytes, escapeCount);
-
- if (modeReturn != ISO2022Modes.ModeInvalidEscape)
- {
- if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
- {
- // Processed escape correctly, no effect (we know about KR mode)
- escapeCount = 0;
- }
-
- // Either way, continue to get next escape or real byte
- continue;
- }
- }
-
- // If ModeInvalidEscape, or no input & must flush, then fall through to add escape.
- }
-
- // Still have something left over in escape buffer
- // Get it and move them down one
- ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
- }
- else
- {
- // Get our next byte
- ch = buffer.GetNextByte();
-
- if (ch == ESCAPE)
- {
- // We'll have an escape sequence, use it if we don't have one buffered already
- if (escapeCount == 0)
- {
- // Start this new escape sequence
- escapeBytes[0] = ch;
- escapeCount = 1;
- continue;
- }
-
- // Flush previous escape sequence, then reuse this escape byte
- buffer.AdjustBytes(-1);
- }
- }
-
- if (ch == SHIFT_OUT)
- {
- currentMode = ISO2022Modes.ModeKR;
- continue;
- }
- else if (ch == SHIFT_IN)
- {
- currentMode = ISO2022Modes.ModeASCII;
- continue;
- }
-
- // Get our full character
- ushort iBytes = ch;
- bool b2Bytes = false;
-
- // MLANG was passing through ' ', '\t' and '\n', so we do so as well, but I don't see that in the RFC.
- if (currentMode == ISO2022Modes.ModeKR && ch != ' ' && ch != '\t' && ch != '\n')
- {
- //
- // To handle errors, we need to check:
- // 1. if trailbyte is there
- // 2. if code is valid
- //
- if (escapeCount > 0)
- {
- // Let another escape fall through
- if (escapeBytes[0] != ESCAPE)
- {
- // Move them down one & get the next data
- iBytes <<= 8;
- iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
- b2Bytes = true;
- }
- }
- else if (buffer.MoreData)
- {
- iBytes <<= 8;
- iBytes |= buffer.GetNextByte();
- b2Bytes = true;
- }
- else
- {
- // Not enough input, use decoder if possible
- if (decoder == null || decoder.MustFlush)
- {
- // No decoder, do fallback for lonely 1st byte
- buffer.Fallback(ch);
- break;
- }
-
- // Stick it in the decoder if we're not counting
- if (chars != null)
- {
- escapeBytes[0] = ch;
- escapeCount = 1;
- }
- break;
- }
- }
-
- // We have a iBytes to try to convert.
- char c = mapBytesToUnicode[iBytes];
-
- // See if it was unknown
- if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
- {
- // Have to do fallback
- if (b2Bytes)
- {
- if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
- break;
- }
- else
- {
- if (!buffer.Fallback(ch))
- break;
- }
- }
- else
- {
- if (!buffer.AddChar(c, b2Bytes ? 2:1))
- break;
- }
- }
-
- // Make sure our decoder state matches our mode, if not counting
- if (chars != null && decoder != null)
- {
- // Remember it if we don't flush
- if (!decoder.MustFlush || escapeCount != 0)
- {
- // Either not flushing or had state (from convert)
- Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
- "[ISO2022Encoding.GetCharsCP50225KR]Expected no state or not converting or not flushing");
-
- decoder.currentMode = currentMode;
-
- // Remember escape buffer
- decoder.bytesLeftOverCount = escapeCount;
- decoder.bytesLeftOver = escapeBytes;
- }
- else
- {
- // We flush, clear buffer
- decoder.currentMode = ISO2022Modes.ModeASCII;
- decoder.shiftInOutMode = ISO2022Modes.ModeASCII;
- decoder.bytesLeftOverCount = 0;
- }
-
- decoder.m_bytesUsed = buffer.BytesUsed;
- }
-
- // Return # of characters we found
- return buffer.Count;
- }
-
- // We know we have an escape sequence, so check it starting with the byte after the escape
- private ISO2022Modes CheckEscapeSequenceKR( byte[] bytes, int escapeCount )
- {
- // Have an escape sequence
- if (bytes[0] != ESCAPE)
- return ISO2022Modes.ModeInvalidEscape;
-
- if (escapeCount < 4)
- return ISO2022Modes.ModeIncompleteEscape;
-
- if (bytes[1] == '$' && bytes[2] == ')' && bytes[3] == 'C') // <esc>$)C
- return ISO2022Modes.ModeKR;
-
- // If we get here we fell through and have an invalid/unknown escape sequence
- return ISO2022Modes.ModeInvalidEscape;
- }
-
- // CP52936 is HZ Encoding
- // HZ Encoding has 4 shift sequences:
- // ~~ '~' (\u7e)
- // ~} shift into 1 byte mode,
- // ~{ shift into 2 byte GB 2312-80
- // ~<NL> Maintain 2 byte mode across new lines (ignore both ~ and <NL> characters)
- // (This is for mailers that restrict to 70 or 80 or whatever character lines)
- //
- // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
- // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
- // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
- //
- // This encoding is designed for transmission by e-mail and news. No bytes should have high bit set.
- // (all bytes <= 0x7f)
- private unsafe int GetCharsCP52936(byte* bytes, int byteCount,
- char* chars, int charCount, ISO2022Decoder decoder)
- {
- Debug.Assert(byteCount >=0, "[ISO2022Encoding.GetCharsCP52936]count >=0");
- Debug.Assert(bytes!=null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null");
-
- // Get our info.
- Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
- this, decoder, chars, charCount, bytes, byteCount);
-
- // No mode information yet
- ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
- int byteLeftOver = -1;
- bool bUsedDecoder = false;
-
- if (decoder != null)
- {
- currentMode = decoder.currentMode;
- // See if we have leftover decoder buffer to use
- // Don't want to mess up decoder if we're counting or throw an exception
- if (decoder.bytesLeftOverCount != 0 )
- {
- // Load our bytesLeftOver
- byteLeftOver = decoder.bytesLeftOver[0];
- }
- }
-
- // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
- while (buffer.MoreData || byteLeftOver >= 0)
- {
- byte ch;
-
- // May have a left over byte
- if (byteLeftOver >= 0)
- {
- ch = (byte)byteLeftOver;
- byteLeftOver = -1;
- }
- else
- {
- ch = buffer.GetNextByte();
- }
-
- // We're in escape mode
- if (ch == '~')
- {
- // Next char is type of switch
- if (!buffer.MoreData)
- {
- // We don't have anything left, it'll be in decoder or a ?
- // don't fail if we are allowing overflows
- if (decoder == null || decoder.MustFlush)
- {
- // We'll be a '?'
- buffer.Fallback(ch);
- // break if we fail & break if we don't (because !MoreData)
- // Add succeeded, continue
- break;
- }
-
- // Stick it in decoder
- if (decoder != null)
- decoder.ClearMustFlush();
-
- if (chars != null)
- {
- decoder.bytesLeftOverCount = 1;
- decoder.bytesLeftOver[0] = (byte)'~';
- bUsedDecoder = true;
- }
- break;
- }
-
- // What type is it?, get 2nd byte
- ch = buffer.GetNextByte();
-
- if (ch == '~' && currentMode == ISO2022Modes.ModeASCII)
- {
- // Its just a ~~ replacement for ~, add it
- if (!buffer.AddChar((char)ch, 2))
- // Add failed, break for converting
- break;
-
- // Add succeeded, continue
- continue;
- }
- else if (ch == '{')
- {
- // Switching to Double Byte mode
- currentMode = ISO2022Modes.ModeHZ;
- continue;
- }
- else if (ch == '}')
- {
- // Switching to ASCII mode
- currentMode = ISO2022Modes.ModeASCII;
- continue;
- }
- else if (ch == '\n')
- {
- // Ignore ~\n sequence
- continue;
- }
- else
- {
- // Unknown escape, back up and try the '~' as a "normal" byte or lead byte
- buffer.AdjustBytes(-1);
- ch = (byte)'~';
- }
- }
-
- // go ahead and add our data
- if (currentMode != ISO2022Modes.ModeASCII)
- {
- // Should be ModeHZ
- Debug.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ");
- char cm;
-
- // Everett allowed characters < 0x20 to be passed as if they were ASCII
- if (ch < 0x20)
- {
- // Emit it as ASCII
- goto STOREASCII;
- }
-
- // Its multibyte, should have another byte
- if (!buffer.MoreData)
- {
- // No bytes left
- // don't fail if we are allowing overflows
- if (decoder == null || decoder.MustFlush)
- {
- // Not enough bytes, fallback lead byte
- buffer.Fallback(ch);
-
- // Break if we fail & break because !MoreData
- break;
- }
-
- if (decoder != null)
- decoder.ClearMustFlush();
-
- // Stick it in decoder
- if (chars != null)
- {
- decoder.bytesLeftOverCount = 1;
- decoder.bytesLeftOver[0] = ch;
- bUsedDecoder = true;
- }
- break;
- }
-
- // Everett uses space as an escape character for single SBCS bytes
- byte ch2 = buffer.GetNextByte();
- ushort iBytes = (ushort)(ch << 8 | ch2);
-
- if (ch == ' ' && ch2 != 0)
- {
- // Get next char and treat it like ASCII (Everett treated space like an escape
- // allowing the next char to be just ascii)
- cm = (char)ch2;
- goto STOREMULTIBYTE;
- }
-
- // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e
- if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) &&
- // Everett allowed high bit mappings for same characters (but only if both bits set)
- (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe))
- {
- // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp)
- if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d)
- {
- iBytes = 0x2121;
- goto MULTIBYTE;
- }
-
- // Illegal char, use fallback. If lead byte is 0 have to do it special and do it first
- if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes)))
- break;
- continue;
- }
-
- MULTIBYTE:
- iBytes |= 0x8080;
- // Look up the multibyte char to stick it in our data
-
- // We have a iBytes to try to convert.
- cm = mapBytesToUnicode[iBytes];
-
- STOREMULTIBYTE:
-
- // See if it was unknown
- if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0)
- {
- // Fall back the unknown stuff
- if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes)))
- break;
- continue;
- }
-
- if (!buffer.AddChar(cm, 2))
- break; // convert ran out of buffer, stop
- continue;
- }
-
- // Just ASCII
- // We allow some chars > 7f because everett did, so we have to look them up.
- STOREASCII:
- char c = mapBytesToUnicode[ch];
-
- // Check if it was unknown
- if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0))
- {
- // fallback the unkown bytes
- if (!buffer.Fallback((byte)ch))
- break;
- continue;
- }
-
- // Go ahead and add our ASCII character
- if (!buffer.AddChar(c))
- break; // convert ran out of buffer, stop
- }
-
- // Need to remember our state, IF we're not counting
- if (chars != null && decoder != null)
- {
- if (!bUsedDecoder)
- {
- // If we didn't use it, clear the byte left over
- decoder.bytesLeftOverCount = 0;
- }
-
- if (decoder.MustFlush && decoder.bytesLeftOverCount == 0)
- {
- decoder.currentMode = ISO2022Modes.ModeASCII;
- }
- else
- {
- // Either not flushing or had state (from convert)
- Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
- "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing");
-
- decoder.currentMode = currentMode;
- }
- decoder.m_bytesUsed = buffer.BytesUsed;
- }
-
- // Return # of characters we found
- return buffer.Count;
- }
-
- // Note: These all end up with 1/2 bytes of average byte count, so unless we're 1 we're always
- // charCount/2 bytes too big.
- public override int GetMaxByteCount(int charCount)
- {
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(charCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
- long byteCount = (long)charCount + 1;
-
- if (EncoderFallback.MaxCharCount > 1)
- byteCount *= EncoderFallback.MaxCharCount;
-
- // Start with just generic DBCS values (sort of).
- int perChar = 2;
- int extraStart = 0;
- int extraEnd = 0;
-
- switch (CodePage)
- {
- case 50220:
- case 50221:
- // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
- perChar = 5; // 5 max (4.5 average)
- extraEnd = 3; // 3 bytes to shift back to ASCII
- break;
- case 50222:
- // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
- perChar = 5; // 5 max (4.5 average)
- extraEnd = 4; // 1 byte to shift from Katakana -> DBCS, 3 bytes to shift back to ASCII from DBCS
- break;
- case 50225:
- // 2 bytes per char + 1 byte SO, or 1 byte per char + 1 byte SI.
- perChar = 3; // 3 max, (2.5 average)
- extraStart = 4; // EUC-KR marker appears at beginning of file.
- extraEnd = 1; // 1 byte to shift back to ascii if necessary.
- break;
- case 52936:
- // 2 bytes per char + 2 byte shift, or 1 byte + 1 byte shift
- // Worst case: left over surrogate with no low surrogate is extra ?, could have to switch to ASCII, then could have HZ and flush to ASCII mode
- perChar = 4; // 4 max, (3.5 average if every other char is HZ/ASCII)
- extraEnd = 2; // 2 if we have to shift back to ASCII
- break;
- }
-
- // Return our surrogate and End plus perChar for each char.
- byteCount *= perChar;
- byteCount += extraStart + extraEnd;
-
- if (byteCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
-
- return (int)byteCount;
- }
-
- public override int GetMaxCharCount(int byteCount)
- {
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(byteCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- int perChar = 1;
- int extraDecoder = 1;
-
- switch (CodePage)
- {
- case 50220:
- case 50221:
- case 50222:
- case 50225:
- perChar = 1; // Worst case all ASCII
- extraDecoder = 3; // Could have left over 3 chars of 4 char escape sequence, that all become ?
- break;
- case 52936:
- perChar = 1; // Worst case all ASCII
- extraDecoder = 1; // sequences are 2 chars, so if next one is illegal, then previous 1 could be ?
- break;
- }
-
- // Figure out our length, perchar * char + whatever extra our decoder could do to us.
- long charCount = ((long)byteCount * perChar) + extraDecoder;
-
- // Just in case we have to fall back unknown ones.
- if (DecoderFallback.MaxCharCount > 1)
- charCount *= DecoderFallback.MaxCharCount;
-
- if (charCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
-
- return (int)charCount;
- }
-
- public override Encoder GetEncoder()
- {
- return new ISO2022Encoder(this);
- }
-
- public override Decoder GetDecoder()
- {
- return new ISO2022Decoder(this);
- }
-
- [Serializable]
- internal class ISO2022Encoder : System.Text.EncoderNLS
- {
- internal ISO2022Modes currentMode;
- internal ISO2022Modes shiftInOutMode;
-
- internal ISO2022Encoder(EncodingNLS encoding) : base(encoding)
- {
- // base calls reset
- }
-
- public override void Reset()
- {
- // Reset
- currentMode = ISO2022Modes.ModeASCII;
- shiftInOutMode = ISO2022Modes.ModeASCII;
- charLeftOver = (char)0;
- if (m_fallbackBuffer != null)
- m_fallbackBuffer.Reset();
- }
-
- // Anything left in our encoder?
- internal override bool HasState
- {
- get
- {
- // Don't check shift-out mode, it may be ascii (JP) or not (KR)
- return (this.charLeftOver != (char)0 ||
- currentMode != ISO2022Modes.ModeASCII);
- }
- }
- }
-
- [Serializable]
- internal class ISO2022Decoder : System.Text.DecoderNLS
- {
- internal byte[] bytesLeftOver;
- internal int bytesLeftOverCount;
- internal ISO2022Modes currentMode;
- internal ISO2022Modes shiftInOutMode;
-
- internal ISO2022Decoder(EncodingNLS encoding) : base(encoding)
- {
- // base calls reset
- }
-
- public override void Reset()
- {
- // Reset
- bytesLeftOverCount = 0;
- bytesLeftOver = new byte[4];
- currentMode = ISO2022Modes.ModeASCII;
- shiftInOutMode = ISO2022Modes.ModeASCII;
- if (m_fallbackBuffer != null)
- m_fallbackBuffer.Reset();
- }
-
- // Anything left in our decoder?
- internal override bool HasState
- {
- get
- {
- // If have bytes left over or not shifted back to ASCII then have problem
- return (this.bytesLeftOverCount != 0 ||
- currentMode != ISO2022Modes.ModeASCII);
- }
- }
- }
-
- static ushort[] HalfToFullWidthKanaTable =
- {
- 0xa1a3, // 0x8ea1 : Halfwidth Ideographic Period
- 0xa1d6, // 0x8ea2 : Halfwidth Opening Corner Bracket
- 0xa1d7, // 0x8ea3 : Halfwidth Closing Corner Bracket
- 0xa1a2, // 0x8ea4 : Halfwidth Ideographic Comma
- 0xa1a6, // 0x8ea5 : Halfwidth Katakana Middle Dot
- 0xa5f2, // 0x8ea6 : Halfwidth Katakana Wo
- 0xa5a1, // 0x8ea7 : Halfwidth Katakana Small A
- 0xa5a3, // 0x8ea8 : Halfwidth Katakana Small I
- 0xa5a5, // 0x8ea9 : Halfwidth Katakana Small U
- 0xa5a7, // 0x8eaa : Halfwidth Katakana Small E
- 0xa5a9, // 0x8eab : Halfwidth Katakana Small O
- 0xa5e3, // 0x8eac : Halfwidth Katakana Small Ya
- 0xa5e5, // 0x8ead : Halfwidth Katakana Small Yu
- 0xa5e7, // 0x8eae : Halfwidth Katakana Small Yo
- 0xa5c3, // 0x8eaf : Halfwidth Katakana Small Tu
- 0xa1bc, // 0x8eb0 : Halfwidth Katakana-Hiragana Prolonged Sound Mark
- 0xa5a2, // 0x8eb1 : Halfwidth Katakana A
- 0xa5a4, // 0x8eb2 : Halfwidth Katakana I
- 0xa5a6, // 0x8eb3 : Halfwidth Katakana U
- 0xa5a8, // 0x8eb4 : Halfwidth Katakana E
- 0xa5aa, // 0x8eb5 : Halfwidth Katakana O
- 0xa5ab, // 0x8eb6 : Halfwidth Katakana Ka
- 0xa5ad, // 0x8eb7 : Halfwidth Katakana Ki
- 0xa5af, // 0x8eb8 : Halfwidth Katakana Ku
- 0xa5b1, // 0x8eb9 : Halfwidth Katakana Ke
- 0xa5b3, // 0x8eba : Halfwidth Katakana Ko
- 0xa5b5, // 0x8ebb : Halfwidth Katakana Sa
- 0xa5b7, // 0x8ebc : Halfwidth Katakana Si
- 0xa5b9, // 0x8ebd : Halfwidth Katakana Su
- 0xa5bb, // 0x8ebe : Halfwidth Katakana Se
- 0xa5bd, // 0x8ebf : Halfwidth Katakana So
- 0xa5bf, // 0x8ec0 : Halfwidth Katakana Ta
- 0xa5c1, // 0x8ec1 : Halfwidth Katakana Ti
- 0xa5c4, // 0x8ec2 : Halfwidth Katakana Tu
- 0xa5c6, // 0x8ec3 : Halfwidth Katakana Te
- 0xa5c8, // 0x8ec4 : Halfwidth Katakana To
- 0xa5ca, // 0x8ec5 : Halfwidth Katakana Na
- 0xa5cb, // 0x8ec6 : Halfwidth Katakana Ni
- 0xa5cc, // 0x8ec7 : Halfwidth Katakana Nu
- 0xa5cd, // 0x8ec8 : Halfwidth Katakana Ne
- 0xa5ce, // 0x8ec9 : Halfwidth Katakana No
- 0xa5cf, // 0x8eca : Halfwidth Katakana Ha
- 0xa5d2, // 0x8ecb : Halfwidth Katakana Hi
- 0xa5d5, // 0x8ecc : Halfwidth Katakana Hu
- 0xa5d8, // 0x8ecd : Halfwidth Katakana He
- 0xa5db, // 0x8ece : Halfwidth Katakana Ho
- 0xa5de, // 0x8ecf : Halfwidth Katakana Ma
- 0xa5df, // 0x8ed0 : Halfwidth Katakana Mi
- 0xa5e0, // 0x8ed1 : Halfwidth Katakana Mu
- 0xa5e1, // 0x8ed2 : Halfwidth Katakana Me
- 0xa5e2, // 0x8ed3 : Halfwidth Katakana Mo
- 0xa5e4, // 0x8ed4 : Halfwidth Katakana Ya
- 0xa5e6, // 0x8ed5 : Halfwidth Katakana Yu
- 0xa5e8, // 0x8ed6 : Halfwidth Katakana Yo
- 0xa5e9, // 0x8ed7 : Halfwidth Katakana Ra
- 0xa5ea, // 0x8ed8 : Halfwidth Katakana Ri
- 0xa5eb, // 0x8ed9 : Halfwidth Katakana Ru
- 0xa5ec, // 0x8eda : Halfwidth Katakana Re
- 0xa5ed, // 0x8edb : Halfwidth Katakana Ro
- 0xa5ef, // 0x8edc : Halfwidth Katakana Wa
- 0xa5f3, // 0x8edd : Halfwidth Katakana N
- 0xa1ab, // 0x8ede : Halfwidth Katakana Voiced Sound Mark
- 0xa1ac // 0x8edf : Halfwidth Katakana Semi-Voiced Sound Mark
- };
- }
-}
-#endif // FEATURE_CODEPAGES_FILE
-
diff --git a/src/mscorlib/src/System/Text/Latin1Encoding.cs b/src/mscorlib/src/System/Text/Latin1Encoding.cs
index 56a6c1f949..26009bf6c0 100644
--- a/src/mscorlib/src/System/Text/Latin1Encoding.cs
+++ b/src/mscorlib/src/System/Text/Latin1Encoding.cs
@@ -13,7 +13,6 @@ namespace System.Text
using System.Collections;
using System.Runtime.CompilerServices;
using System.Runtime.Serialization;
- using System.Security.Permissions;
//
@@ -488,7 +487,6 @@ namespace System.Text
}
}
-#if !FEATURE_NORM_IDNA_ONLY
public override bool IsAlwaysNormalized(NormalizationForm form)
{
// Latin-1 contains precomposed characters, so normal for Form C.
@@ -498,7 +496,6 @@ namespace System.Text
// Only true for form C.
return (form == NormalizationForm.FormC);
}
-#endif // !FEATURE_NORM_IDNA_ONLY
// Since our best fit table is small we'll hard code it
internal override char[] GetBestFitUnicodeToBytesData()
{
diff --git a/src/mscorlib/src/System/Text/MLangCodePageEncoding.cs b/src/mscorlib/src/System/Text/MLangCodePageEncoding.cs
deleted file mode 100644
index a82db91b98..0000000000
--- a/src/mscorlib/src/System/Text/MLangCodePageEncoding.cs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-
-// WARNING:
-//
-// This is just an IObjectReference proxy for the former MLang Encodings (V1.1)
-// We keep the old name now even for the Whidbey V2.0 IObjectReference because it also
-// works with the Everett V1.1 version.
-namespace System.Text
-{
- using System;
- using System.Runtime.Serialization;
- using System.Security.Permissions;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
-
- /*=================================MLangCodePageEncoding==================================
- ** This class is here only to deserialize the MLang classes from Everett (V1.1) into
- ** Appropriate Whidbey (V2.0) objects. We also serialize the Whidbey classes
- ** using this proxy since we pretty much need one anyway and that solves Whidbey
- ** to Everett compatibility as well.
- ==============================================================================*/
-
- [Serializable]
- internal sealed class MLangCodePageEncoding : IObjectReference, ISerializable
- {
- // Temp stuff
- [NonSerialized]
- private int m_codePage;
- [NonSerialized]
- private bool m_isReadOnly;
- [NonSerialized]
- private bool m_deserializedFromEverett = false;
-
- [NonSerialized]
- private EncoderFallback encoderFallback = null;
- [NonSerialized]
- private DecoderFallback decoderFallback = null;
-
- // Might need this when GetRealObjecting
- [NonSerialized]
- private Encoding realEncoding = null;
-
- // Constructor called by serialization.
- internal MLangCodePageEncoding(SerializationInfo info, StreamingContext context)
- {
- // Any info?
- if (info==null) throw new ArgumentNullException(nameof(info));
- Contract.EndContractBlock();
-
- // All versions have a code page
- this.m_codePage = (int)info.GetValue("m_codePage", typeof(int));
-
- // See if we have a code page
- try
- {
- //
- // Try Whidbey V2.0 Fields
- //
- this.m_isReadOnly = (bool)info.GetValue("m_isReadOnly", typeof(bool));
-
- this.encoderFallback = (EncoderFallback)info.GetValue("encoderFallback", typeof(EncoderFallback));
- this.decoderFallback = (DecoderFallback)info.GetValue("decoderFallback", typeof(DecoderFallback));
- }
- catch (SerializationException)
- {
- //
- // Didn't have Whidbey things, must be Everett
- //
- this.m_deserializedFromEverett = true;
-
- // May as well be read only
- this.m_isReadOnly = true;
- }
- }
-
- // Just get it from GetEncoding
- public Object GetRealObject(StreamingContext context)
- {
- // Get our encoding (Note: This has default fallbacks for readonly and everett cases)
- this.realEncoding = Encoding.GetEncoding(this.m_codePage);
-
- // If its read only then it uses default fallbacks, otherwise pick up the new ones
- // Otherwise we want to leave the new one read only
- if (!this.m_deserializedFromEverett && !this.m_isReadOnly)
- {
- this.realEncoding = (Encoding)this.realEncoding.Clone();
- this.realEncoding.EncoderFallback = this.encoderFallback;
- this.realEncoding.DecoderFallback = this.decoderFallback;
- }
-
- return this.realEncoding;
- }
-
- // ISerializable implementation
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // We cannot ever call this.
- Debug.Assert(false, "Didn't expect to make it to MLangCodePageEncoding ISerializable.GetObjectData");
- throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
- }
-
-// Same problem with the Encoder, this only happens with Everett Encoders
- [Serializable]
- internal sealed class MLangEncoder : IObjectReference, ISerializable
- {
- // Might need this when GetRealObjecting
- [NonSerialized]
- private Encoding realEncoding = null;
-
- // Constructor called by serialization, have to handle deserializing from Everett
- internal MLangEncoder(SerializationInfo info, StreamingContext context)
- {
- // Any info?
- if (info==null) throw new ArgumentNullException(nameof(info));
- Contract.EndContractBlock();
-
- this.realEncoding = (Encoding)info.GetValue("m_encoding", typeof(Encoding));
- }
-
- // Just get it from GetEncoder
- public Object GetRealObject(StreamingContext context)
- {
- return this.realEncoding.GetEncoder();
- }
-
- // ISerializable implementation, get data for this object
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // We cannot ever call this.
- Debug.Assert(false, "Didn't expect to make it to MLangCodePageEncoding.MLangEncoder.GetObjectData");
- throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
- }
- }
-
-
- // Same problem with the Decoder, this only happens with Everett Decoders
- [Serializable]
- internal sealed class MLangDecoder : IObjectReference, ISerializable
- {
- // Might need this when GetRealObjecting
- [NonSerialized]
- private Encoding realEncoding = null;
-
- // Constructor called by serialization, have to handle deserializing from Everett
- internal MLangDecoder(SerializationInfo info, StreamingContext context)
- {
- // Any info?
- if (info==null) throw new ArgumentNullException(nameof(info));
- Contract.EndContractBlock();
-
- this.realEncoding = (Encoding)info.GetValue("m_encoding", typeof(Encoding));
- }
-
- // Just get it from GetDecoder
- public Object GetRealObject(StreamingContext context)
- {
- return this.realEncoding.GetDecoder();
- }
-
- // ISerializable implementation, get data for this object
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // We cannot ever call this.
- Debug.Assert(false, "Didn't expect to make it to MLangCodePageEncoding.MLangDecoder.GetObjectData");
- throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
- }
- }
- }
-}
diff --git a/src/mscorlib/src/System/Text/Normalization.Windows.cs b/src/mscorlib/src/System/Text/Normalization.Windows.cs
index b2faf0db68..3bcba08934 100644
--- a/src/mscorlib/src/System/Text/Normalization.Windows.cs
+++ b/src/mscorlib/src/System/Text/Normalization.Windows.cs
@@ -21,19 +21,15 @@ namespace System.Text
//
// Flags that track whether given normalization form was initialized
//
-#if !FEATURE_NORM_IDNA_ONLY
private static volatile bool NFC;
private static volatile bool NFD;
private static volatile bool NFKC;
private static volatile bool NFKD;
-#endif // !FEATURE_NORM_IDNA_ONLY
private static volatile bool IDNA;
-#if !FEATURE_NORM_IDNA_ONLY
private static volatile bool NFCDisallowUnassigned;
private static volatile bool NFDDisallowUnassigned;
private static volatile bool NFKCDisallowUnassigned;
private static volatile bool NFKDDisallowUnassigned;
-#endif // !FEATURE_NORM_IDNA_ONLY
private static volatile bool IDNADisallowUnassigned;
private static volatile bool Other;
@@ -77,7 +73,6 @@ namespace System.Text
{
switch ((ExtendedNormalizationForms)form)
{
-#if !FEATURE_NORM_IDNA_ONLY
case ExtendedNormalizationForms.FormC:
if (NFC) return;
InitializeForm(form, "normnfc.nlp");
@@ -101,7 +96,6 @@ namespace System.Text
InitializeForm(form, "normnfkd.nlp");
NFKD = true;
break;
-#endif // !FEATURE_NORM_IDNA_ONLY
case ExtendedNormalizationForms.FormIdna:
if (IDNA) return;
@@ -109,7 +103,6 @@ namespace System.Text
IDNA = true;
break;
-#if !FEATURE_NORM_IDNA_ONLY
case ExtendedNormalizationForms.FormCDisallowUnassigned:
if (NFCDisallowUnassigned) return;
InitializeForm(form, "normnfc.nlp");
@@ -133,7 +126,6 @@ namespace System.Text
InitializeForm(form, "normnfkd.nlp");
NFKDDisallowUnassigned = true;
break;
-#endif // !FEATURE_NORM_IDNA_ONLY
case ExtendedNormalizationForms.FormIdnaDisallowUnassigned:
if (IDNADisallowUnassigned) return;
diff --git a/src/mscorlib/src/System/Text/Normalization.cs b/src/mscorlib/src/System/Text/Normalization.cs
index e7e733a587..c81149d59a 100644
--- a/src/mscorlib/src/System/Text/Normalization.cs
+++ b/src/mscorlib/src/System/Text/Normalization.cs
@@ -5,32 +5,25 @@
namespace System.Text
{
// This is the enumeration for Normalization Forms
-[System.Runtime.InteropServices.ComVisible(true)]
public enum NormalizationForm
{
-#if !FEATURE_NORM_IDNA_ONLY
FormC = 1,
FormD = 2,
FormKC = 5,
FormKD = 6
-#endif // !FEATURE_NORM_IDNA_ONLY
}
internal enum ExtendedNormalizationForms
{
-#if !FEATURE_NORM_IDNA_ONLY
FormC = 1,
FormD = 2,
FormKC = 5,
FormKD = 6,
-#endif // !FEATURE_NORM_IDNA_ONLY
FormIdna = 0xd,
-#if !FEATURE_NORM_IDNA_ONLY
FormCDisallowUnassigned = 0x101,
FormDDisallowUnassigned = 0x102,
FormKCDisallowUnassigned = 0x105,
FormKDDisallowUnassigned = 0x106,
-#endif // !FEATURE_NORM_IDNA_ONLY
FormIdnaDisallowUnassigned = 0x10d
}
}
diff --git a/src/mscorlib/src/System/Text/SBCSCodePageEncoding.cs b/src/mscorlib/src/System/Text/SBCSCodePageEncoding.cs
deleted file mode 100644
index 8b07149fb7..0000000000
--- a/src/mscorlib/src/System/Text/SBCSCodePageEncoding.cs
+++ /dev/null
@@ -1,1009 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
-namespace System.Text
-{
- using System;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
- using System.Text;
- using System.Threading;
- using System.Globalization;
- using System.Runtime.Serialization;
- using System.Security;
- using System.Security.Permissions;
-
- // SBCSCodePageEncoding
- [Serializable]
- internal class SBCSCodePageEncoding : BaseCodePageEncoding, ISerializable
- {
- // Pointers to our memory section parts
- [NonSerialized]
- unsafe char* mapBytesToUnicode = null; // char 256
- [NonSerialized]
- unsafe byte* mapUnicodeToBytes = null; // byte 65536
- [NonSerialized]
- unsafe int* mapCodePageCached = null; // to remember which CP is cached
-
- const char UNKNOWN_CHAR=(char)0xFFFD;
-
- // byteUnknown is used for default fallback only
- [NonSerialized]
- byte byteUnknown;
- [NonSerialized]
- char charUnknown;
-
- public SBCSCodePageEncoding(int codePage) : this(codePage, codePage)
- {
- }
-
- internal SBCSCodePageEncoding(int codePage, int dataCodePage) : base(codePage, dataCodePage)
- {
- }
-
- // Constructor called by serialization.
- // Note: We use the base GetObjectData however
- internal SBCSCodePageEncoding(SerializationInfo info, StreamingContext context) : base(0)
- {
- // Actually this can't ever get called, CodePageEncoding is our proxy
- Debug.Assert(false, "Didn't expect to make it to SBCSCodePageEncoding serialization constructor");
- throw new ArgumentNullException("this");
- }
-
- // We have a managed code page entry, so load our tables
- // SBCS data section looks like:
- //
- // char[256] - what each byte maps to in unicode. No support for surrogates. 0 is undefined code point
- // (except 0 for byte 0 is expected to be a real 0)
- //
- // byte/char* - Data for best fit (unicode->bytes), again no best fit for Unicode
- // 1st WORD is Unicode // of 1st character position
- // Next bytes are best fit byte for that position. Position is incremented after each byte
- // byte < 0x20 means skip the next n positions. (Where n is the byte #)
- // byte == 1 means that next word is another unicode code point #
- // byte == 0 is unknown. (doesn't override initial WCHAR[256] table!
- protected override unsafe void LoadManagedCodePage()
- {
- // Should be loading OUR code page
- Debug.Assert(pCodePage->CodePage == this.dataTableCodePage,
- "[SBCSCodePageEncoding.LoadManagedCodePage]Expected to load data table code page");
-
- // Make sure we're really a 1 byte code page
- if (pCodePage->ByteCount != 1)
- throw new NotSupportedException(
- Environment.GetResourceString("NotSupported_NoCodepageData", CodePage));
-
- // Remember our unknown bytes & chars
- byteUnknown = (byte)pCodePage->ByteReplace;
- charUnknown = pCodePage->UnicodeReplace;
-
- // Get our mapped section 65536 bytes for unicode->bytes, 256 * 2 bytes for bytes->unicode
- // Plus 4 byte to remember CP # when done loading it. (Don't want to get IA64 or anything out of alignment)
- byte *pMemorySection = GetSharedMemory(65536*1 + 256*2 + 4 + iExtraBytes);
-
- mapBytesToUnicode = (char*)pMemorySection;
- mapUnicodeToBytes = (byte*)(pMemorySection + 256 * 2);
- mapCodePageCached = (int*)(pMemorySection + 256 * 2 + 65536 * 1 + iExtraBytes);
-
- // If its cached (& filled in) we don't have to do anything else
- if (*mapCodePageCached != 0)
- {
- Debug.Assert(*mapCodePageCached == this.dataTableCodePage,
- "[DBCSCodePageEncoding.LoadManagedCodePage]Expected mapped section cached page to be same as data table code page. Cached : " +
- *mapCodePageCached + " Expected:" + this.dataTableCodePage);
-
- if (*mapCodePageCached != this.dataTableCodePage)
- throw new OutOfMemoryException(
- Environment.GetResourceString("Arg_OutOfMemoryException"));
-
- // If its cached (& filled in) we don't have to do anything else
- return;
- }
-
- // Need to read our data file and fill in our section.
- // WARNING: Multiple code pieces could do this at once (so we don't have to lock machine-wide)
- // so be careful here. Only stick legal values in here, don't stick temporary values.
-
- // Read our data file and set mapBytesToUnicode and mapUnicodeToBytes appropriately
- // First table is just all 256 mappings
- char* pTemp = (char*)&(pCodePage->FirstDataWord);
- for (int b = 0; b < 256; b++)
- {
- // Don't want to force 0's to map Unicode wrong. 0 byte == 0 unicode already taken care of
- if (pTemp[b] != 0 || b == 0)
- {
- mapBytesToUnicode[b] = pTemp[b];
-
- if (pTemp[b] != UNKNOWN_CHAR)
- mapUnicodeToBytes[pTemp[b]] = (byte)b;
- }
- else
- {
- mapBytesToUnicode[b] = UNKNOWN_CHAR;
- }
- }
-
- // We're done with our mapped section, set our flag so others don't have to rebuild table.
- *mapCodePageCached = this.dataTableCodePage;
- }
-
- // Private object for locking instead of locking on a public type for SQL reliability work.
- private static Object s_InternalSyncObject;
- private static Object InternalSyncObject
- {
- get
- {
- if (s_InternalSyncObject == null)
- {
- Object o = new Object();
- Interlocked.CompareExchange<Object>(ref s_InternalSyncObject, o, null);
- }
- return s_InternalSyncObject;
- }
- }
-
- // Read in our best fit table
- protected unsafe override void ReadBestFitTable()
- {
- // Lock so we don't confuse ourselves.
- lock(InternalSyncObject)
- {
- // If we got a best fit array already, then don't do this
- if (arrayUnicodeBestFit == null)
- {
- //
- // Read in Best Fit table.
- //
-
- // First check the SBCS->Unicode best fit table, which starts right after the
- // 256 word data table. This table looks like word, word where 1st word is byte and 2nd
- // word is replacement for that word. It ends when byte == 0.
- byte* pData = (byte*)&(pCodePage->FirstDataWord);
- pData += 512;
-
- // Need new best fit array
- char[] arrayTemp = new char[256];
- for (int i = 0; i < 256; i++)
- arrayTemp[i] = mapBytesToUnicode[i];
-
- // See if our words are zero
- ushort byteTemp;
- while ((byteTemp = *((ushort*)pData)) != 0)
- {
-
- Debug.Assert(arrayTemp[byteTemp] == UNKNOWN_CHAR, String.Format(CultureInfo.InvariantCulture,
- "[SBCSCodePageEncoding::ReadBestFitTable] Expected unallocated byte (not 0x{2:X2}) for best fit byte at 0x{0:X2} for code page {1}",
- byteTemp, CodePage, (int)arrayTemp[byteTemp]));
- pData += 2;
-
- arrayTemp[byteTemp] = *((char*)pData);
- pData += 2;
- }
-
- // Remember our new array
- arrayBytesBestFit = arrayTemp;
-
- // It was on 0, it needs to be on next byte
- pData+=2;
- byte* pUnicodeToSBCS = pData;
-
- // Now count our characters from our Unicode->SBCS best fit table,
- // which is right after our 256 byte data table
- int iBestFitCount = 0;
-
- // Now do the UnicodeToBytes Best Fit mapping (this is the one we normally think of when we say "best fit")
- // pData should be pointing at the first data point for Bytes->Unicode table
- int unicodePosition = *((ushort*)pData);
- pData += 2;
-
- while (unicodePosition < 0x10000)
- {
- // Get the next byte
- byte input = *pData;
- pData++;
-
- // build our table:
- if (input == 1)
- {
- // Use next 2 bytes as our byte position
- unicodePosition = *((ushort*)pData);
- pData+=2;
- }
- else if (input < 0x20 && input > 0 && input != 0x1e)
- {
- // Advance input characters
- unicodePosition += input;
- }
- else
- {
- // Use this character if it isn't zero
- if (input > 0)
- iBestFitCount++;
-
- // skip this unicode position in any case
- unicodePosition++;
- }
- }
-
- // Make an array for our best fit data
- arrayTemp = new char[iBestFitCount*2];
-
- // Now actually read in the data
- // reset pData should be pointing at the first data point for Bytes->Unicode table
- pData = pUnicodeToSBCS;
- unicodePosition = *((ushort*)pData);
- pData += 2;
- iBestFitCount = 0;
-
- while (unicodePosition < 0x10000)
- {
- // Get the next byte
- byte input = *pData;
- pData++;
-
- // build our table:
- if (input == 1)
- {
- // Use next 2 bytes as our byte position
- unicodePosition = *((ushort*)pData);
- pData+=2;
- }
- else if (input < 0x20 && input > 0 && input != 0x1e)
- {
- // Advance input characters
- unicodePosition += input;
- }
- else
- {
- // Check for escape for glyph range
- if (input == 0x1e)
- {
- // Its an escape, so just read next byte directly
- input = *pData;
- pData++;
- }
-
- // 0 means just skip me
- if (input > 0)
- {
- // Use this character
- arrayTemp[iBestFitCount++] = (char)unicodePosition;
- // Have to map it to Unicode because best fit will need unicode value of best fit char.
- arrayTemp[iBestFitCount++] = mapBytesToUnicode[input];
-
- // This won't work if it won't round trip.
- Debug.Assert(arrayTemp[iBestFitCount-1] != (char)0,
- String.Format(CultureInfo.InvariantCulture,
- "[SBCSCodePageEncoding.ReadBestFitTable] No valid Unicode value {0:X4} for round trip bytes {1:X4}, encoding {2}",
- (int)mapBytesToUnicode[input], (int)input, CodePage));
- }
- unicodePosition++;
- }
- }
-
- // Remember it
- arrayUnicodeBestFit = arrayTemp;
- }
- }
- }
-
- // GetByteCount
- // Note: We start by assuming that the output will be the same as count. Having
- // an encoder or fallback may change that assumption
- internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(count >= 0, "[SBCSCodePageEncoding.GetByteCount]count is negative");
- Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetByteCount]chars is null");
-
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[SBCSCodePageEncoding.GetByteCount]Attempting to use null fallback");
-
- CheckMemorySection();
-
- // Need to test fallback
- EncoderReplacementFallback fallback = null;
-
- // Get any left over characters
- char charLeftOver = (char)0;
- if (encoder != null)
- {
- charLeftOver = encoder.charLeftOver;
- Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver),
- "[SBCSCodePageEncoding.GetByteCount]leftover character should be high surrogate");
- fallback = encoder.Fallback as EncoderReplacementFallback;
-
- // Verify that we have no fallbackbuffer, actually for SBCS this is always empty, so just assert
- Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
- encoder.FallbackBuffer.Remaining == 0,
- "[SBCSCodePageEncoding.GetByteCount]Expected empty fallback buffer at start");
- }
- else
- {
- // If we aren't using default fallback then we may have a complicated count.
- fallback = this.EncoderFallback as EncoderReplacementFallback;
- }
-
- if ((fallback != null && fallback.MaxCharCount == 1)/* || bIsBestFit*/)
- {
- // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always
- // same as input size.
- // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy.
-
- // We could however have 1 extra byte if the last call had an encoder and a funky fallback and
- // if we don't use the funky fallback this time.
-
- // Do we have an extra char left over from last time?
- if (charLeftOver > 0)
- count++;
-
- return (count);
- }
-
- // It had a funky fallback, so its more complicated
- // Need buffer maybe later
- EncoderFallbackBuffer fallbackBuffer = null;
-
- // prepare our end
- int byteCount = 0;
- char* charEnd = chars + count;
-
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- // Since left over char was a surrogate, it'll have to be fallen back.
- // Get Fallback
- Debug.Assert(encoder != null, "[SBCSCodePageEncoding.GetByteCount]Expect to have encoder if we have a charLeftOver");
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
-
- // This will fallback a pair if *chars is a low surrogate
- fallbackBuffer.InternalFallback(charLeftOver, ref chars);
- }
-
- // Now we may have fallback char[] already from the encoder
-
- // Go ahead and do it, including the fallback.
- char ch;
- while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
- chars < charEnd)
- {
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
-
- // get byte for this char
- byte bTemp = mapUnicodeToBytes[ch];
-
- // Check for fallback, this'll catch surrogate pairs too.
- if (bTemp == 0 && ch != (char)0)
- {
- if (fallbackBuffer == null)
- {
- // Create & init fallback buffer
- if (encoder == null)
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = encoder.FallbackBuffer;
-
- // chars has moved so we need to remember figure it out so Exception fallback
- // index will be correct
- fallbackBuffer.InternalInitialize(charEnd - count, charEnd, encoder, false);
- }
-
- // Get Fallback
- fallbackBuffer.InternalFallback(ch, ref chars);
- continue;
- }
-
- // We'll use this one
- byteCount++;
- }
-
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[SBCSEncoding.GetByteCount]Expected Empty fallback buffer at end");
-
- return (int)byteCount;
- }
-
- internal override unsafe int GetBytes(char* chars, int charCount,
- byte* bytes, int byteCount, EncoderNLS encoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetBytes]bytes is null");
- Debug.Assert(byteCount >= 0, "[SBCSCodePageEncoding.GetBytes]byteCount is negative");
- Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetBytes]chars is null");
- Debug.Assert(charCount >= 0, "[SBCSCodePageEncoding.GetBytes]charCount is negative");
-
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[SBCSCodePageEncoding.GetBytes]Attempting to use null encoder fallback");
-
- CheckMemorySection();
-
- // Need to test fallback
- EncoderReplacementFallback fallback = null;
-
- // Get any left over characters
- char charLeftOver = (char)0;
- if (encoder != null)
- {
- charLeftOver = encoder.charLeftOver;
- Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver),
- "[SBCSCodePageEncoding.GetBytes]leftover character should be high surrogate");
- fallback = encoder.Fallback as EncoderReplacementFallback;
-
- // Verify that we have no fallbackbuffer, for SBCS its always empty, so just assert
- Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
- encoder.FallbackBuffer.Remaining == 0,
- "[SBCSCodePageEncoding.GetBytes]Expected empty fallback buffer at start");
-// if (encoder.m_throwOnOverflow && encoder.InternalHasFallbackBuffer &&
-// encoder.FallbackBuffer.Remaining > 0)
-// throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
-// this.EncodingName, encoder.Fallback.GetType()));
- }
- else
- {
- // If we aren't using default fallback then we may have a complicated count.
- fallback = this.EncoderFallback as EncoderReplacementFallback;
- }
-
- // prepare our end
- char* charEnd = chars + charCount;
- byte* byteStart = bytes;
- char* charStart = chars;
-
- // See if we do the fast default or slightly slower fallback
- if (fallback != null && fallback.MaxCharCount == 1)
- {
- // Make sure our fallback character is valid first
- byte bReplacement = mapUnicodeToBytes[fallback.DefaultString[0]];
-
- // Check for replacements in range, otherwise fall back to slow version.
- if (bReplacement != 0)
- {
- // We should have exactly as many output bytes as input bytes, unless there's a left
- // over character, in which case we may need one more.
-
- // If we had a left over character will have to add a ? (This happens if they had a funky
- // fallback last time, but not this time.) (We can't spit any out though
- // because with fallback encoder each surrogate is treated as a seperate code point)
- if (charLeftOver > 0)
- {
- // Have to have room
- // Throw even if doing no throw version because this is just 1 char,
- // so buffer will never be big enough
- if (byteCount == 0)
- ThrowBytesOverflow(encoder, true);
-
- // This'll make sure we still have more room and also make sure our return value is correct.
- *(bytes++) = bReplacement;
- byteCount--; // We used one of the ones we were counting.
- }
-
- // This keeps us from overrunning our output buffer
- if (byteCount < charCount)
- {
- // Throw or make buffer smaller?
- ThrowBytesOverflow(encoder, byteCount < 1);
-
- // Just use what we can
- charEnd = chars + byteCount;
- }
-
- // Simple way
- while (chars < charEnd)
- {
- char ch2 = *chars;
- chars++;
-
- byte bTemp = mapUnicodeToBytes[ch2];
-
- // Check for fallback
- if (bTemp == 0 && ch2 != (char)0)
- *bytes = bReplacement;
- else
- *bytes = bTemp;
-
- bytes++;
- }
-
- // Clear encoder
- if (encoder != null)
- {
- encoder.charLeftOver = (char)0;
- encoder.m_charsUsed = (int)(chars-charStart);
- }
- return (int)(bytes - byteStart);
- }
- }
-
- // Slower version, have to do real fallback.
-
- // For fallback we may need a fallback buffer, we know we aren't default fallback
- EncoderFallbackBuffer fallbackBuffer = null;
-
- // prepare our end
- byte* byteEnd = bytes + byteCount;
-
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- // Since left over char was a surrogate, it'll have to be fallen back.
- // Get Fallback
- Debug.Assert(encoder != null, "[SBCSCodePageEncoding.GetBytes]Expect to have encoder if we have a charLeftOver");
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true);
-
- // This will fallback a pair if *chars is a low surrogate
- fallbackBuffer.InternalFallback(charLeftOver, ref chars);
- if (fallbackBuffer.Remaining > byteEnd - bytes)
- {
- // Throw it, if we don't have enough for this we never will
- ThrowBytesOverflow(encoder, true);
- }
- }
-
- // Now we may have fallback char[] already from the encoder fallback above
-
- // Go ahead and do it, including the fallback.
- char ch;
- while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
- chars < charEnd)
- {
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
-
- // get byte for this char
- byte bTemp = mapUnicodeToBytes[ch];
-
- // Check for fallback, this'll catch surrogate pairs too.
- if (bTemp == 0 && ch != (char)0)
- {
- // Get Fallback
- if ( fallbackBuffer == null )
- {
- // Create & init fallback buffer
- if (encoder == null)
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = encoder.FallbackBuffer;
- // chars has moved so we need to remember figure it out so Exception fallback
- // index will be correct
- fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true);
- }
-
- // Make sure we have enough room. Each fallback char will be 1 output char
- // (or recursion exception will be thrown)
- fallbackBuffer.InternalFallback(ch, ref chars);
- if (fallbackBuffer.Remaining > byteEnd - bytes)
- {
- // Didn't use this char, reset it
- Debug.Assert(chars > charStart,
- "[SBCSCodePageEncoding.GetBytes]Expected chars to have advanced (fallback)");
- chars--;
- fallbackBuffer.InternalReset();
-
- // Throw it & drop this data
- ThrowBytesOverflow(encoder, chars == charStart);
- break;
- }
- continue;
- }
-
- // We'll use this one
- // Bounds check
- if (bytes >= byteEnd)
- {
- // didn't use this char, we'll throw or use buffer
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.bFallingBack == false,
- "[SBCSCodePageEncoding.GetBytes]Expected to NOT be falling back");
- if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
- {
- Debug.Assert(chars > charStart,
- "[SBCSCodePageEncoding.GetBytes]Expected chars to have advanced (normal)");
- chars--; // don't use last char
- }
- ThrowBytesOverflow(encoder, chars == charStart); // throw ?
- break; // don't throw, stop
- }
-
- // Go ahead and add it
- *bytes = bTemp;
- bytes++;
- }
-
- // encoder stuff if we have one
- if (encoder != null)
- {
- // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases
- if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder)
- // Clear it in case of MustFlush
- encoder.charLeftOver = (char)0;
-
- // Set our chars used count
- encoder.m_charsUsed = (int)(chars - charStart);
- }
-
- // Expect Empty fallback buffer for SBCS
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[SBCSEncoding.GetBytes]Expected Empty fallback buffer at end");
-
- return (int)(bytes - byteStart);
- }
-
- // This is internal and called by something else,
- internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
- {
- // Just assert, we're called internally so these should be safe, checked already
- Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetCharCount]bytes is null");
- Debug.Assert(count >= 0, "[SBCSCodePageEncoding.GetCharCount]byteCount is negative");
-
- CheckMemorySection();
-
- // See if we have best fit
- bool bUseBestFit = false;
-
- // Only need decoder fallback buffer if not using default replacement fallback or best fit fallback.
- DecoderReplacementFallback fallback = null;
-
- if (decoder == null)
- {
- fallback = this.DecoderFallback as DecoderReplacementFallback;
- bUseBestFit = this.DecoderFallback.IsMicrosoftBestFitFallback;
- }
- else
- {
- fallback = decoder.Fallback as DecoderReplacementFallback;
- bUseBestFit = decoder.Fallback.IsMicrosoftBestFitFallback;
- Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
- decoder.FallbackBuffer.Remaining == 0,
- "[SBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start");
- }
-
- if (bUseBestFit || (fallback != null && fallback.MaxCharCount == 1))
- {
- // Just return length, SBCS stay the same length because they don't map to surrogate
- // pairs and we don't have a decoder fallback.
- return count;
- }
-
- // Might need one of these later
- DecoderFallbackBuffer fallbackBuffer = null;
-
- // Have to do it the hard way.
- // Assume charCount will be == count
- int charCount = count;
- byte[] byteBuffer = new byte[1];
-
- // Do it our fast way
- byte* byteEnd = bytes + count;
-
- // Quick loop
- while (bytes < byteEnd)
- {
- // Faster if don't use *bytes++;
- char c;
- c = mapBytesToUnicode[*bytes];
- bytes++;
-
- // If unknown we have to do fallback count
- if (c == UNKNOWN_CHAR)
- {
- // Must have a fallback buffer
- if (fallbackBuffer == null)
- {
- // Need to adjust count so we get real start
- if (decoder == null)
- fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - count, null);
- }
-
- // Use fallback buffer
- byteBuffer[0] = *(bytes - 1);
- charCount--; // We'd already reserved one for *(bytes-1)
- charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
- }
- }
-
- // Fallback buffer must be empty
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[SBCSEncoding.GetCharCount]Expected Empty fallback buffer at end");
-
- // Converted sequence is same length as input
- return charCount;
- }
-
- internal override unsafe int GetChars(byte* bytes, int byteCount,
- char* chars, int charCount, DecoderNLS decoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetChars]bytes is null");
- Debug.Assert(byteCount >= 0, "[SBCSCodePageEncoding.GetChars]byteCount is negative");
- Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetChars]chars is null");
- Debug.Assert(charCount >= 0, "[SBCSCodePageEncoding.GetChars]charCount is negative");
-
- CheckMemorySection();
-
- // See if we have best fit
- bool bUseBestFit = false;
-
- // Do it fast way if using ? replacement or best fit fallbacks
- byte* byteEnd = bytes + byteCount;
- byte* byteStart = bytes;
- char* charStart = chars;
-
- // Only need decoder fallback buffer if not using default replacement fallback or best fit fallback.
- DecoderReplacementFallback fallback = null;
-
- if (decoder == null)
- {
- fallback = this.DecoderFallback as DecoderReplacementFallback;
- bUseBestFit = this.DecoderFallback.IsMicrosoftBestFitFallback;
- }
- else
- {
- fallback = decoder.Fallback as DecoderReplacementFallback;
- bUseBestFit = decoder.Fallback.IsMicrosoftBestFitFallback;
- Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
- decoder.FallbackBuffer.Remaining == 0,
- "[SBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start");
- }
-
- if (bUseBestFit || (fallback != null && fallback.MaxCharCount == 1))
- {
- // Try it the fast way
- char replacementChar;
- if (fallback == null)
- replacementChar = '?'; // Best fit alwasy has ? for fallback for SBCS
- else
- replacementChar = fallback.DefaultString[0];
-
- // Need byteCount chars, otherwise too small buffer
- if (charCount < byteCount)
- {
- // Need at least 1 output byte, throw if must throw
- ThrowCharsOverflow(decoder, charCount < 1);
-
- // Not throwing, use what we can
- byteEnd = bytes + charCount;
- }
-
- // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
- while (bytes < byteEnd)
- {
- char c;
- if (bUseBestFit)
- {
- if (arrayBytesBestFit == null)
- {
- ReadBestFitTable();
- }
- c = arrayBytesBestFit[*bytes];
- }
- else
- c = mapBytesToUnicode[*bytes];
- bytes++;
-
- if (c == UNKNOWN_CHAR)
- // This is an invalid byte in the ASCII encoding.
- *chars = replacementChar;
- else
- *chars = c;
- chars++;
- }
-
- // bytes & chars used are the same
- if (decoder != null)
- decoder.m_bytesUsed = (int)(bytes - byteStart);
- return (int)(chars - charStart);
- }
-
- // Slower way's going to need a fallback buffer
- DecoderFallbackBuffer fallbackBuffer = null;
- byte[] byteBuffer = new byte[1];
- char* charEnd = chars + charCount;
-
- // Not quite so fast loop
- while (bytes < byteEnd)
- {
- // Faster if don't use *bytes++;
- char c = mapBytesToUnicode[*bytes];
- bytes++;
-
- // See if it was unknown
- if (c == UNKNOWN_CHAR)
- {
- // Make sure we have a fallback buffer
- if (fallbackBuffer == null)
- {
- if (decoder == null)
- fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
- }
-
- // Use fallback buffer
- Debug.Assert(bytes > byteStart,
- "[SBCSCodePageEncoding.GetChars]Expected bytes to have advanced already (unknown byte)");
- byteBuffer[0] = *(bytes - 1);
- // Fallback adds fallback to chars, but doesn't increment chars unless the whole thing fits.
- if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
- {
- // May or may not throw, but we didn't get this byte
- bytes--; // unused byte
- fallbackBuffer.InternalReset(); // Didn't fall this back
- ThrowCharsOverflow(decoder, bytes == byteStart); // throw?
- break; // don't throw, but stop loop
- }
- }
- else
- {
- // Make sure we have buffer space
- if (chars >= charEnd)
- {
- Debug.Assert(bytes > byteStart,
- "[SBCSCodePageEncoding.GetChars]Expected bytes to have advanced already (known byte)");
- bytes--; // unused byte
- ThrowCharsOverflow(decoder, bytes == byteStart); // throw?
- break; // don't throw, but stop loop
- }
-
- *(chars) = c;
- chars++;
- }
- }
-
- // Might have had decoder fallback stuff.
- if (decoder != null)
- decoder.m_bytesUsed = (int)(bytes - byteStart);
-
- // Expect Empty fallback buffer for GetChars
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[SBCSEncoding.GetChars]Expected Empty fallback buffer at end");
-
- return (int)(chars - charStart);
- }
-
- public override int GetMaxByteCount(int charCount)
- {
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(charCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
- long byteCount = (long)charCount + 1;
-
- if (EncoderFallback.MaxCharCount > 1)
- byteCount *= EncoderFallback.MaxCharCount;
-
- // 1 to 1 for most characters. Only surrogates with fallbacks have less.
-
- if (byteCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
- return (int)byteCount;
- }
-
- public override int GetMaxCharCount(int byteCount)
- {
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(byteCount),
- Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
- Contract.EndContractBlock();
-
- // Just return length, SBCS stay the same length because they don't map to surrogate
- long charCount = (long)byteCount;
-
- // 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer.
- if (DecoderFallback.MaxCharCount > 1)
- charCount *= DecoderFallback.MaxCharCount;
-
- if (charCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(byteCount), Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
-
- return (int)charCount;
- }
-
- // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
- public override bool IsSingleByte
- {
- get
- {
- return true;
- }
- }
-
- [System.Runtime.InteropServices.ComVisible(false)]
- public override bool IsAlwaysNormalized(NormalizationForm form)
- {
- // Most of these code pages could be decomposed or have compatibility mappings for KC, KD, & D
- // additionally the allow unassigned forms and IDNA wouldn't work either, so C is our choice.
- if (form == NormalizationForm.FormC)
- {
- // Form C is only true for some code pages. They have to have all 256 code points assigned
- // and not map to unassigned or combinable code points.
- switch (CodePage)
- {
- // Return true for some code pages.
- case 1252: // (Latin I - ANSI)
- case 1250: // (Eastern Europe - ANSI)
- case 1251: // (Cyrillic - ANSI)
- case 1254: // (Turkish - ANSI)
- case 1256: // (Arabic - ANSI)
- case 28591: // (ISO 8859-1 Latin I)
- case 437: // (United States - OEM)
- case 737: // (Greek (aka 437G) - OEM)
- case 775: // (Baltic - OEM)
- case 850: // (Multilingual (Latin I) - OEM)
- case 852: // (Slovak (Latin II) - OEM)
- case 855: // (Cyrillic - OEM)
- case 858: // (Multilingual (Latin I) - OEM + Euro)
- case 860: // (Portuguese - OEM)
- case 861: // (Icelandic - OEM)
- case 862: // (Hebrew - OEM)
- case 863: // (Canadian French - OEM)
- case 865: // (Nordic - OEM)
- case 866: // (Russian - OEM)
- case 869: // (Modern Greek - OEM)
- case 10007: // (Cyrillic - MAC)
- case 10017: // (Ukraine - MAC)
- case 10029: // (Latin II - MAC)
- case 28592: // (ISO 8859-2 Eastern Europe)
- case 28594: // (ISO 8859-4 Baltic)
- case 28595: // (ISO 8859-5 Cyrillic)
- case 28599: // (ISO 8859-9 Latin Alphabet No.5)
- case 28603: // (ISO/IEC 8859-13:1998 (Lithuanian))
- case 28605: // (ISO 8859-15 Latin 9 (IBM923=IBM819+Euro))
- case 037: // (IBM EBCDIC U.S./Canada)
- case 500: // (IBM EBCDIC International)
- case 870: // (IBM EBCDIC Latin-2 Multilingual/ROECE)
- case 1026: // (IBM EBCDIC Latin-5 Turkey)
- case 1047: // (IBM Latin-1/Open System)
- case 1140: // (IBM EBCDIC U.S./Canada (037+Euro))
- case 1141: // (IBM EBCDIC Germany (20273(IBM273)+Euro))
- case 1142: // (IBM EBCDIC Denmark/Norway (20277(IBM277+Euro))
- case 1143: // (IBM EBCDIC Finland/Sweden (20278(IBM278)+Euro))
- case 1144: // (IBM EBCDIC Italy (20280(IBM280)+Euro))
- case 1145: // (IBM EBCDIC Latin America/Spain (20284(IBM284)+Euro))
- case 1146: // (IBM EBCDIC United Kingdom (20285(IBM285)+Euro))
- case 1147: // (IBM EBCDIC France (20297(IBM297+Euro))
- case 1148: // (IBM EBCDIC International (500+Euro))
- case 1149: // (IBM EBCDIC Icelandic (20871(IBM871+Euro))
- case 20273: // (IBM EBCDIC Germany)
- case 20277: // (IBM EBCDIC Denmark/Norway)
- case 20278: // (IBM EBCDIC Finland/Sweden)
- case 20280: // (IBM EBCDIC Italy)
- case 20284: // (IBM EBCDIC Latin America/Spain)
- case 20285: // (IBM EBCDIC United Kingdom)
- case 20297: // (IBM EBCDIC France)
- case 20871: // (IBM EBCDIC Icelandic)
- case 20880: // (IBM EBCDIC Cyrillic)
- case 20924: // (IBM Latin-1/Open System (IBM924=IBM1047+Euro))
- case 21025: // (IBM EBCDIC Cyrillic (Serbian, Bulgarian))
- case 720: // (Arabic - Transparent ASMO)
- case 20866: // (Russian - KOI8)
- case 21866: // (Ukrainian - KOI8-U)
- return true;
- }
- }
-
- // False for IDNA and unknown
- return false;
- }
- }
-}
-#endif // FEATURE_CODEPAGES_FILE
diff --git a/src/mscorlib/src/System/Text/StringBuilder.cs b/src/mscorlib/src/System/Text/StringBuilder.cs
index f20146fe00..72247c333e 100644
--- a/src/mscorlib/src/System/Text/StringBuilder.cs
+++ b/src/mscorlib/src/System/Text/StringBuilder.cs
@@ -41,7 +41,6 @@ namespace System.Text {
// Console.WriteLine(sb1);
// Console.WriteLine(sb2);
//
- [System.Runtime.InteropServices.ComVisible(true)]
[Serializable]
public sealed class StringBuilder : ISerializable
{
@@ -593,7 +592,7 @@ namespace System.Text {
throw new ArgumentOutOfRangeException(nameof(startIndex), Environment.GetResourceString("ArgumentOutOfRange_GenericPositive"));
}
if (charCount<0) {
- throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GenericPositive"));
+ throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GenericPositive"));
}
Contract.Ensures(Contract.Result<StringBuilder>() != null);
Contract.EndContractBlock();
@@ -605,7 +604,7 @@ namespace System.Text {
throw new ArgumentNullException(nameof(value));
}
if (charCount > value.Length - startIndex) {
- throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_Index"));
+ throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_Index"));
}
if (charCount==0) {
@@ -715,20 +714,17 @@ namespace System.Text {
}
}
- [System.Runtime.InteropServices.ComVisible(false)]
public StringBuilder AppendLine() {
Contract.Ensures(Contract.Result<StringBuilder>() != null);
return Append(Environment.NewLine);
}
- [System.Runtime.InteropServices.ComVisible(false)]
public StringBuilder AppendLine(string value) {
Contract.Ensures(Contract.Result<StringBuilder>() != null);
Append(value);
return Append(Environment.NewLine);
}
- [System.Runtime.InteropServices.ComVisible(false)]
public void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count) {
if (destination == null) {
throw new ArgumentNullException(nameof(destination));
@@ -848,7 +844,7 @@ namespace System.Text {
}
if (length > Length - startIndex) {
- throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index"));
+ throw new ArgumentOutOfRangeException(nameof(length), Environment.GetResourceString("ArgumentOutOfRange_Index"));
}
Contract.Ensures(Contract.Result<StringBuilder>() != null);
Contract.EndContractBlock();
@@ -1205,7 +1201,7 @@ namespace System.Text {
}
if (charCount < 0) {
- throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GenericPositive"));
+ throw new ArgumentOutOfRangeException(nameof(charCount), Environment.GetResourceString("ArgumentOutOfRange_GenericPositive"));
}
if (startIndex > value.Length - charCount) {
diff --git a/src/mscorlib/src/System/Text/SurrogateEncoder.cs b/src/mscorlib/src/System/Text/SurrogateEncoder.cs
deleted file mode 100644
index bbfa180f29..0000000000
--- a/src/mscorlib/src/System/Text/SurrogateEncoder.cs
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-
-// WARNING:
-//
-// This is just an IObjectReference proxy for the former V1.1 Surrogate Encoder
-// All this does is make an encoder of the correct type, it DOES NOT maintain state.
-namespace System.Text
-{
- using System;
- using System.Runtime.Serialization;
- using System.Security.Permissions;
- using System.Diagnostics;
- using System.Diagnostics.Contracts;
-
- /*=================================SurrogateEncoder==================================
- ** This class is here only to deserialize the SurrogateEncoder class from Everett (V1.1) into
- ** Appropriate Whidbey (V2.0) objects.
- ==============================================================================*/
-
- [Serializable]
- internal sealed class SurrogateEncoder : IObjectReference, ISerializable
- {
- // Might need this when GetRealObjecting
- [NonSerialized]
- private Encoding realEncoding = null;
-
- // Constructor called by serialization.
- internal SurrogateEncoder(SerializationInfo info, StreamingContext context)
- {
- // Any info?
- if (info==null) throw new ArgumentNullException(nameof(info));
- Contract.EndContractBlock();
-
- // All versions have a code page
- this.realEncoding = (Encoding)info.GetValue("m_encoding", typeof(Encoding));
- }
-
- // Just get it from GetEncoding
- public Object GetRealObject(StreamingContext context)
- {
- // Need to get our Encoding's Encoder
- return this.realEncoding.GetEncoder();
- }
-
- // ISerializable implementation
- void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
- {
- // We cannot ever call this.
- Debug.Assert(false, "Didn't expect to make it to SurrogateEncoder.GetObjectData");
- throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
- }
- }
-}
-
diff --git a/src/mscorlib/src/System/Text/UTF7Encoding.cs b/src/mscorlib/src/System/Text/UTF7Encoding.cs
index 624ca735f6..9418d2e768 100644
--- a/src/mscorlib/src/System/Text/UTF7Encoding.cs
+++ b/src/mscorlib/src/System/Text/UTF7Encoding.cs
@@ -10,13 +10,11 @@ namespace System.Text
{
using System;
using System.Runtime.Serialization;
- using System.Security.Permissions;
using System.Diagnostics;
using System.Diagnostics.Contracts;
[Serializable]
- [System.Runtime.InteropServices.ComVisible(true)]
public class UTF7Encoding : Encoding
{
private const String base64Chars =
@@ -127,7 +125,6 @@ namespace System.Text
- [System.Runtime.InteropServices.ComVisible(false)]
public override bool Equals(Object value)
{
UTF7Encoding that = value as UTF7Encoding;
@@ -142,7 +139,6 @@ namespace System.Text
// Compared to all the other encodings, variations of UTF7 are unlikely
- [System.Runtime.InteropServices.ComVisible(false)]
public override int GetHashCode()
{
return this.CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
@@ -170,20 +166,17 @@ namespace System.Text
return EncodingForwarder.GetByteCount(this, chars, index, count);
}
- [System.Runtime.InteropServices.ComVisible(false)]
public override int GetByteCount(String s)
{
return EncodingForwarder.GetByteCount(this, s);
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetByteCount(char* chars, int count)
{
return EncodingForwarder.GetByteCount(this, chars, count);
}
- [System.Runtime.InteropServices.ComVisible(false)]
public override int GetBytes(String s, int charIndex, int charCount,
byte[] bytes, int byteIndex)
{
@@ -206,7 +199,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
{
return EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount);
@@ -221,7 +213,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetCharCount(byte* bytes, int count)
{
return EncodingForwarder.GetCharCount(this, bytes, count);
@@ -234,7 +225,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
{
return EncodingForwarder.GetChars(this, bytes, byteCount, chars, charCount);
@@ -243,7 +233,6 @@ namespace System.Text
// Returns a string containing the decoded representation of a range of
// bytes in a byte array.
- [System.Runtime.InteropServices.ComVisible(false)]
public override String GetString(byte[] bytes, int index, int count)
{
return EncodingForwarder.GetString(this, bytes, index, count);
diff --git a/src/mscorlib/src/System/Text/UTF8Encoding.cs b/src/mscorlib/src/System/Text/UTF8Encoding.cs
index ba19649b56..191bbfef56 100644
--- a/src/mscorlib/src/System/Text/UTF8Encoding.cs
+++ b/src/mscorlib/src/System/Text/UTF8Encoding.cs
@@ -20,7 +20,6 @@ namespace System.Text
using System;
using System.Globalization;
using System.Runtime.Serialization;
- using System.Security.Permissions;
using System.Diagnostics;
using System.Diagnostics.Contracts;
@@ -37,7 +36,6 @@ namespace System.Text
// switch the byte orderings.
[Serializable]
-[System.Runtime.InteropServices.ComVisible(true)]
public class UTF8Encoding : Encoding
{
/*
@@ -131,7 +129,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetByteCount(char* chars, int count)
{
return EncodingForwarder.GetByteCount(this, chars, count);
@@ -159,7 +156,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
{
return EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount);
@@ -174,7 +170,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetCharCount(byte* bytes, int count)
{
return EncodingForwarder.GetCharCount(this, bytes, count);
@@ -187,7 +182,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
{
return EncodingForwarder.GetChars(this, bytes, byteCount, chars, charCount);
@@ -196,7 +190,6 @@ namespace System.Text
// Returns a string containing the decoded representation of a range of
// bytes in a byte array.
- [System.Runtime.InteropServices.ComVisible(false)]
public override String GetString(byte[] bytes, int index, int count)
{
return EncodingForwarder.GetString(this, bytes, index, count);
diff --git a/src/mscorlib/src/System/Text/UnicodeEncoding.cs b/src/mscorlib/src/System/Text/UnicodeEncoding.cs
index 25255c3230..d8ef18ab05 100644
--- a/src/mscorlib/src/System/Text/UnicodeEncoding.cs
+++ b/src/mscorlib/src/System/Text/UnicodeEncoding.cs
@@ -11,13 +11,11 @@ namespace System.Text
using System;
using System.Globalization;
using System.Runtime.Serialization;
- using System.Security.Permissions;
using System.Diagnostics;
using System.Diagnostics.Contracts;
[Serializable]
- [System.Runtime.InteropServices.ComVisible(true)]
public class UnicodeEncoding : Encoding
{
// Used by Encoding.BigEndianUnicode/Unicode for lazy initialization
@@ -111,7 +109,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetByteCount(char* chars, int count)
{
return EncodingForwarder.GetByteCount(this, chars, count);
@@ -139,7 +136,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
{
return EncodingForwarder.GetBytes(this, chars, charCount, bytes, byteCount);
@@ -154,7 +150,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public override unsafe int GetCharCount(byte* bytes, int count)
{
return EncodingForwarder.GetCharCount(this, bytes, count);
@@ -167,7 +162,6 @@ namespace System.Text
}
[CLSCompliant(false)]
- [System.Runtime.InteropServices.ComVisible(false)]
public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
{
return EncodingForwarder.GetChars(this, bytes, byteCount, chars, charCount);
@@ -176,7 +170,6 @@ namespace System.Text
// Returns a string containing the decoded representation of a range of
// bytes in a byte array.
- [System.Runtime.InteropServices.ComVisible(false)]
public override String GetString(byte[] bytes, int index, int count)
{
return EncodingForwarder.GetString(this, bytes, index, count);
@@ -1659,7 +1652,6 @@ namespace System.Text
}
- [System.Runtime.InteropServices.ComVisible(false)]
public override System.Text.Encoder GetEncoder()
{
return new EncoderNLS(this);