summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarek Safar <marek.safar@gmail.com>2019-01-04 02:34:03 +0100
committerJan Kotas <jkotas@microsoft.com>2019-01-03 15:34:03 -1000
commit8d2f4ed72a9d48164a4c8147fe7911e62a5d9594 (patch)
tree69aac796ea6d8d0a0013dbe92788ded6970893d5 /src
parent10c3e60c863d01f638daa240af957c3c5791cda5 (diff)
downloadcoreclr-8d2f4ed72a9d48164a4c8147fe7911e62a5d9594.tar.gz
coreclr-8d2f4ed72a9d48164a4c8147fe7911e62a5d9594.tar.bz2
coreclr-8d2f4ed72a9d48164a4c8147fe7911e62a5d9594.zip
Adds portable version of EncodingTable (#21735)
* Adds portable version of EncodingTable Most of the implementation is extracted from CoreRT * Use string comparer directly * Remove no longer used COMNlsInfo * Adds localization support * Removes FeatureCoreFxGlobalization configuration * Remove redudant encodings look up from GetEncoding * Keep Hashtable for nameToCodePage as it does not lock on read * Replace locked dictionary lookup with short switch * Include comment with msbuild task link used to generate the data file
Diffstat (limited to 'src')
-rw-r--r--src/System.Private.CoreLib/System.Private.CoreLib.csproj4
-rw-r--r--src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems3
-rw-r--r--src/System.Private.CoreLib/shared/System/Globalization/DateTimeParse.cs2
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/CodePageDataItem.cs35
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/Encoding.cs71
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/EncodingData.cs298
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/EncodingInfo.cs51
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/EncodingTable.cs194
-rw-r--r--src/System.Private.CoreLib/src/System/Text/CodePageDataItem.Unix.cs68
-rw-r--r--src/System.Private.CoreLib/src/System/Text/CodePageDataItem.cs123
-rw-r--r--src/System.Private.CoreLib/src/System/Text/EncodingTable.Unix.cs178
-rw-r--r--src/System.Private.CoreLib/src/System/Text/EncodingTable.cs250
-rw-r--r--src/classlibnative/CMakeLists.txt4
-rw-r--r--src/classlibnative/bcltype/stringnative.cpp31
-rw-r--r--src/classlibnative/bcltype/stringnative.h2
-rw-r--r--src/classlibnative/inc/nlsinfo.h75
-rw-r--r--src/classlibnative/nls/CMakeLists.txt6
-rw-r--r--src/classlibnative/nls/encodingdata.cpp103
-rw-r--r--src/classlibnative/nls/nlsinfo.cpp76
-rw-r--r--src/dlls/mscoree/coreclr/CMakeLists.txt1
-rw-r--r--src/vm/ceemain.cpp3
-rw-r--r--src/vm/ecalllist.h12
-rw-r--r--src/vm/mscorlib.cpp1
23 files changed, 549 insertions, 1042 deletions
diff --git a/src/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/System.Private.CoreLib/System.Private.CoreLib.csproj
index 0541c4bba3..255d8c1c8b 100644
--- a/src/System.Private.CoreLib/System.Private.CoreLib.csproj
+++ b/src/System.Private.CoreLib/System.Private.CoreLib.csproj
@@ -307,10 +307,6 @@
<Compile Include="$(BclSourcesRoot)\System\StartupHookProvider.cs" />
<Compile Include="$(BclSourcesRoot)\System\String.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\StubHelpers.cs" />
- <Compile Include="$(BclSourcesRoot)\System\Text\CodePageDataItem.cs" Condition="'$(FeatureCoreFxGlobalization)' != 'true'" />
- <Compile Include="$(BclSourcesRoot)\System\Text\CodePageDataItem.Unix.cs" Condition="'$(FeatureCoreFxGlobalization)' == 'true'" />
- <Compile Include="$(BclSourcesRoot)\System\Text\EncodingTable.cs" Condition="'$(FeatureCoreFxGlobalization)' != 'true'" />
- <Compile Include="$(BclSourcesRoot)\System\Text\EncodingTable.Unix.cs" Condition="'$(FeatureCoreFxGlobalization)' == 'true'" />
<Compile Include="$(BclSourcesRoot)\System\Text\StringBuilder.CoreCLR.cs" />
<Compile Include="$(BclSourcesRoot)\System\Threading\ClrThreadPoolBoundHandle.cs" />
<Compile Include="$(BclSourcesRoot)\System\Threading\ClrThreadPoolBoundHandleOverlapped.cs" />
diff --git a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
index 1308424cfb..5df43dce88 100644
--- a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
+++ b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
@@ -648,6 +648,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\SystemException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIEncoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilderCache.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\CodePageDataItem.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Decoder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderNLS.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderBestFitFallback.cs" />
@@ -661,9 +662,11 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderReplacementFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingData.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingNLS.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingProvider.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingTable.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\NormalizationForm.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Rune.cs" />
diff --git a/src/System.Private.CoreLib/shared/System/Globalization/DateTimeParse.cs b/src/System.Private.CoreLib/shared/System/Globalization/DateTimeParse.cs
index d36ead3df0..9c0913e8d4 100644
--- a/src/System.Private.CoreLib/shared/System/Globalization/DateTimeParse.cs
+++ b/src/System.Private.CoreLib/shared/System/Globalization/DateTimeParse.cs
@@ -5149,9 +5149,7 @@ new DS[] { DS.ERROR, DS.TX_NNN, DS.TX_NNN, DS.TX_NNN, DS.ERROR, DS.ERROR,
return;
Trace("DateTimeFormatInfo Properties");
-#if !FEATURE_COREFX_GLOBALIZATION
Trace($" NativeCalendarName {Hex(dtfi.NativeCalendarName)}");
-#endif
Trace($" AMDesignator {Hex(dtfi.AMDesignator)}");
Trace($" PMDesignator {Hex(dtfi.PMDesignator)}");
Trace($" TimeSeparator {Hex(dtfi.TimeSeparator)}");
diff --git a/src/System.Private.CoreLib/shared/System/Text/CodePageDataItem.cs b/src/System.Private.CoreLib/shared/System/Text/CodePageDataItem.cs
new file mode 100644
index 0000000000..e4b8d4d730
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/CodePageDataItem.cs
@@ -0,0 +1,35 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Text
+{
+ internal class CodePageDataItem
+ {
+ public int CodePage { get; }
+ public int UIFamilyCodePage { get; }
+ public string WebName { get; }
+ public string HeaderName { get; }
+ public string BodyName { get; }
+ public string DisplayName { get; }
+ public uint Flags { get; }
+
+ internal CodePageDataItem(
+ int codePage,
+ int uiFamilyCodePage,
+ string webName,
+ string headerName,
+ string bodyName,
+ string displayName,
+ uint flags)
+ {
+ CodePage = codePage;
+ UIFamilyCodePage = uiFamilyCodePage;
+ WebName = webName;
+ HeaderName = headerName;
+ BodyName = bodyName;
+ DisplayName = displayName;
+ Flags = flags;
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/shared/System/Text/Encoding.cs b/src/System.Private.CoreLib/shared/System/Text/Encoding.cs
index 005f08afd7..ec8c9c33ff 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Encoding.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Encoding.cs
@@ -261,19 +261,6 @@ namespace System.Text
if (result != null)
return result;
- //
- // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to
- // add the corresponding item in EncodingTable.
- // Otherwise, the code below will throw exception when trying to call
- // EncodingTable.GetDataItem().
- //
- if (codepage < 0 || codepage > 65535)
- {
- throw new ArgumentOutOfRangeException(
- nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
- }
-
-
switch (codepage)
{
case CodePageDefault: return Default; // 0
@@ -294,14 +281,13 @@ namespace System.Text
throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
}
- // Is it a valid code page?
- if (EncodingTable.GetCodePageDataItem(codepage) == null)
+ if (codepage < 0 || codepage > 65535)
{
- throw new NotSupportedException(
- SR.Format(SR.NotSupported_NoCodepageData, codepage));
+ throw new ArgumentOutOfRangeException(
+ nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
}
- return UTF8;
+ throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, codepage));
}
public static Encoding GetEncoding(int codepage,
@@ -399,58 +385,19 @@ namespace System.Text
}
// Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
-#if PROJECTN
- public virtual String EncodingName
+ public virtual string EncodingName
{
get
{
- string encodingName = GetLocalizedEncodingNameResource(this.CodePage);
- if (encodingName == null)
- {
- throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage));
- }
-
- if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal))
+ if (_dataItem == null)
{
- // On ProjectN, resource strings are stripped from retail builds and replaced by
- // their identifier names. Since this property is meant to be a localized string,
- // but we don't localize ProjectN, we specifically need to do something reasonable
- // in this case. This currently returns the English name of the encoding from a
- // static data table.
- encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName;
- if (encodingName == null)
- {
- throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage));
- }
+ GetDataItem();
}
- return encodingName;
+
+ return _dataItem.DisplayName;
}
}
- private static string GetLocalizedEncodingNameResource(int codePage)
- {
- switch (codePage)
- {
- case 1200: return SR.Globalization_cp_1200;
- case 1201: return SR.Globalization_cp_1201;
- case 12000: return SR.Globalization_cp_12000;
- case 12001: return SR.Globalization_cp_12001;
- case 20127: return SR.Globalization_cp_20127;
- case 28591: return SR.Globalization_cp_28591;
- case 65000: return SR.Globalization_cp_65000;
- case 65001: return SR.Globalization_cp_65001;
- default: return null;
- }
- }
-#else
- public virtual string EncodingName
- {
- get
- {
- return SR.GetResourceString("Globalization_cp_" + _codePage.ToString());
- }
- }
-#endif
// Returns the name for this encoding that can be used with mail agent header
// tags. If the encoding may not be used, the string is empty.
diff --git a/src/System.Private.CoreLib/shared/System/Text/EncodingData.cs b/src/System.Private.CoreLib/shared/System/Text/EncodingData.cs
new file mode 100644
index 0000000000..01837fc531
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/EncodingData.cs
@@ -0,0 +1,298 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// THIS IS AUTOGENERATED FILE CREATED BY
+// https://github.com/dotnet/buildtools/blob/6736870b84e06b75e7df32bb84d442db1b2afa10/src/Microsoft.DotNet.Build.Tasks/PackageFiles/encoding.targets
+//
+
+namespace System.Text
+{
+ internal static partial class EncodingTable
+ {
+ //
+ // s_encodingNames is the concatenation of all supported IANA names for each codepage.
+ // This is done rather than using a large readonly array of strings to avoid
+ // generating a large amount of code in the static constructor.
+ // Using indices from s_encodingNamesIndices, we binary search this string when mapping
+ // an encoding name to a codepage. Note that these names are all lowercase and are
+ // sorted alphabetically.
+ //
+ private const string s_encodingNames =
+ "ansi_x3.4-1968" + // 20127
+ "ansi_x3.4-1986" + // 20127
+ "ascii" + // 20127
+ "cp367" + // 20127
+ "cp819" + // 28591
+ "csascii" + // 20127
+ "csisolatin1" + // 28591
+ "csunicode11utf7" + // 65000
+ "ibm367" + // 20127
+ "ibm819" + // 28591
+ "iso-10646-ucs-2" + // 1200
+ "iso-8859-1" + // 28591
+ "iso-ir-100" + // 28591
+ "iso-ir-6" + // 20127
+ "iso646-us" + // 20127
+ "iso8859-1" + // 28591
+ "iso_646.irv:1991" + // 20127
+ "iso_8859-1" + // 28591
+ "iso_8859-1:1987" + // 28591
+ "l1" + // 28591
+ "latin1" + // 28591
+ "ucs-2" + // 1200
+ "unicode" + // 1200
+ "unicode-1-1-utf-7" + // 65000
+ "unicode-1-1-utf-8" + // 65001
+ "unicode-2-0-utf-7" + // 65000
+ "unicode-2-0-utf-8" + // 65001
+ "unicodefffe" + // 1201
+ "us" + // 20127
+ "us-ascii" + // 20127
+ "utf-16" + // 1200
+ "utf-16be" + // 1201
+ "utf-16le" + // 1200
+ "utf-32" + // 12000
+ "utf-32be" + // 12001
+ "utf-32le" + // 12000
+ "utf-7" + // 65000
+ "utf-8" + // 65001
+ "x-unicode-1-1-utf-7" + // 65000
+ "x-unicode-1-1-utf-8" + // 65001
+ "x-unicode-2-0-utf-7" + // 65000
+ "x-unicode-2-0-utf-8"; // 65001
+
+ //
+ // s_encodingNameIndices contains the start index of every encoding name in the string
+ // s_encodingNames. We infer the length of each string by looking at the start index
+ // of the next string.
+ //
+ private static readonly int[] s_encodingNameIndices = new int[]
+ {
+ 0, // ansi_x3.4-1968 (20127)
+ 14, // ansi_x3.4-1986 (20127)
+ 28, // ascii (20127)
+ 33, // cp367 (20127)
+ 38, // cp819 (28591)
+ 43, // csascii (20127)
+ 50, // csisolatin1 (28591)
+ 61, // csunicode11utf7 (65000)
+ 76, // ibm367 (20127)
+ 82, // ibm819 (28591)
+ 88, // iso-10646-ucs-2 (1200)
+ 103, // iso-8859-1 (28591)
+ 113, // iso-ir-100 (28591)
+ 123, // iso-ir-6 (20127)
+ 131, // iso646-us (20127)
+ 140, // iso8859-1 (28591)
+ 149, // iso_646.irv:1991 (20127)
+ 165, // iso_8859-1 (28591)
+ 175, // iso_8859-1:1987 (28591)
+ 190, // l1 (28591)
+ 192, // latin1 (28591)
+ 198, // ucs-2 (1200)
+ 203, // unicode (1200)
+ 210, // unicode-1-1-utf-7 (65000)
+ 227, // unicode-1-1-utf-8 (65001)
+ 244, // unicode-2-0-utf-7 (65000)
+ 261, // unicode-2-0-utf-8 (65001)
+ 278, // unicodefffe (1201)
+ 289, // us (20127)
+ 291, // us-ascii (20127)
+ 299, // utf-16 (1200)
+ 305, // utf-16be (1201)
+ 313, // utf-16le (1200)
+ 321, // utf-32 (12000)
+ 327, // utf-32be (12001)
+ 335, // utf-32le (12000)
+ 343, // utf-7 (65000)
+ 348, // utf-8 (65001)
+ 353, // x-unicode-1-1-utf-7 (65000)
+ 372, // x-unicode-1-1-utf-8 (65001)
+ 391, // x-unicode-2-0-utf-7 (65000)
+ 410, // x-unicode-2-0-utf-8 (65001)
+ 429
+ };
+
+ //
+ // s_codePagesByName contains the list of supported codepages which match the encoding
+ // names listed in s_encodingNames. The way mapping works is we binary search
+ // s_encodingNames using s_encodingNamesIndices until we find a match for a given name.
+ // The index of the entry in s_encodingNamesIndices will be the index of codepage in
+ // s_codePagesByName.
+ //
+ private static readonly ushort[] s_codePagesByName = new ushort[]
+ {
+ 20127, // ansi_x3.4-1968
+ 20127, // ansi_x3.4-1986
+ 20127, // ascii
+ 20127, // cp367
+ 28591, // cp819
+ 20127, // csascii
+ 28591, // csisolatin1
+ 65000, // csunicode11utf7
+ 20127, // ibm367
+ 28591, // ibm819
+ 1200, // iso-10646-ucs-2
+ 28591, // iso-8859-1
+ 28591, // iso-ir-100
+ 20127, // iso-ir-6
+ 20127, // iso646-us
+ 28591, // iso8859-1
+ 20127, // iso_646.irv:1991
+ 28591, // iso_8859-1
+ 28591, // iso_8859-1:1987
+ 28591, // l1
+ 28591, // latin1
+ 1200, // ucs-2
+ 1200, // unicode
+ 65000, // unicode-1-1-utf-7
+ 65001, // unicode-1-1-utf-8
+ 65000, // unicode-2-0-utf-7
+ 65001, // unicode-2-0-utf-8
+ 1201, // unicodefffe
+ 20127, // us
+ 20127, // us-ascii
+ 1200, // utf-16
+ 1201, // utf-16be
+ 1200, // utf-16le
+ 12000, // utf-32
+ 12001, // utf-32be
+ 12000, // utf-32le
+ 65000, // utf-7
+ 65001, // utf-8
+ 65000, // x-unicode-1-1-utf-7
+ 65001, // x-unicode-1-1-utf-8
+ 65000, // x-unicode-2-0-utf-7
+ 65001 // x-unicode-2-0-utf-8
+ };
+
+ //
+ // When retrieving the value for System.Text.Encoding.WebName or
+ // System.Text.Encoding.EncodingName given System.Text.Encoding.CodePage,
+ // we perform a linear search on s_mappedCodePages to find the index of the
+ // given codepage. This is used to index WebNameIndices to get the start
+ // index of the web name in the string WebNames, and to index
+ // s_englishNameIndices to get the start of the English name in
+ // s_englishNames. In addition, this arrays indices correspond to the indices
+ // into s_uiFamilyCodePages and s_flags.
+ //
+ private static readonly ushort[] s_mappedCodePages = new ushort[]
+ {
+ 1200, // utf-16
+ 1201, // utf-16be
+ 12000, // utf-32
+ 12001, // utf-32be
+ 20127, // us-ascii
+ 28591, // iso-8859-1
+ 65000, // utf-7
+ 65001 // utf-8
+ };
+
+ //
+ // s_uiFamilyCodePages is indexed by the corresponding index in s_mappedCodePages.
+ //
+ private static readonly int[] s_uiFamilyCodePages = new int[]
+ {
+ 1200,
+ 1200,
+ 1200,
+ 1200,
+ 1252,
+ 1252,
+ 1200,
+ 1200
+ };
+
+ //
+ // s_webNames is a concatenation of the default encoding names
+ // for each code page. It is used in retrieving the value for
+ // System.Text.Encoding.WebName given System.Text.Encoding.CodePage.
+ // This is done rather than using a large readonly array of strings to avoid
+ // generating a large amount of code in the static constructor.
+ //
+ private const string s_webNames =
+ "utf-16" + // 1200
+ "utf-16BE" + // 1201
+ "utf-32" + // 12000
+ "utf-32BE" + // 12001
+ "us-ascii" + // 20127
+ "iso-8859-1" + // 28591
+ "utf-7" + // 65000
+ "utf-8"; // 65001
+
+ //
+ // s_webNameIndices contains the start index of each code page's default
+ // web name in the string s_webNames. It is indexed by an index into
+ // s_mappedCodePages.
+ //
+ private static readonly int[] s_webNameIndices = new int[]
+ {
+ 0, // utf-16 (1200)
+ 6, // utf-16be (1201)
+ 14, // utf-32 (12000)
+ 20, // utf-32be (12001)
+ 28, // us-ascii (20127)
+ 36, // iso-8859-1 (28591)
+ 46, // utf-7 (65000)
+ 51, // utf-8 (65001)
+ 56
+ };
+
+ //
+ // s_englishNames is the concatenation of the English names for each codepage.
+ // It is used in retrieving the value for System.Text.Encoding.EncodingName
+ // given System.Text.Encoding.CodePage.
+ // This is done rather than using a large readonly array of strings to avoid
+ // generating a large amount of code in the static constructor.
+ //
+ private const string s_englishNames =
+ "Unicode" + // 1200
+ "Unicode (Big-Endian)" + // 1201
+ "Unicode (UTF-32)" + // 12000
+ "Unicode (UTF-32 Big-Endian)" + // 12001
+ "US-ASCII" + // 20127
+ "Western European (ISO)" + // 28591
+ "Unicode (UTF-7)" + // 65000
+ "Unicode (UTF-8)"; // 65001
+
+ //
+ // s_englishNameIndices contains the start index of each code page's English
+ // name in the string s_englishNames. It is indexed by an index into
+ // s_mappedCodePages.
+ //
+ private static readonly int[] s_englishNameIndices = new int[]
+ {
+ 0, // Unicode (1200)
+ 7, // Unicode (Big-Endian) (1201)
+ 27, // Unicode (UTF-32) (12000)
+ 43, // Unicode (UTF-32 Big-Endian) (12001)
+ 70, // US-ASCII (20127)
+ 78, // Western European (ISO) (28591)
+ 100, // Unicode (UTF-7) (65000)
+ 115, // Unicode (UTF-8) (65001)
+ 130
+ };
+
+ // redeclaring these constants here for readability below
+ private const uint MIMECONTF_MAILNEWS = Encoding.MIMECONTF_MAILNEWS;
+ private const uint MIMECONTF_BROWSER = Encoding.MIMECONTF_BROWSER;
+ private const uint MIMECONTF_SAVABLE_MAILNEWS = Encoding.MIMECONTF_SAVABLE_MAILNEWS;
+ private const uint MIMECONTF_SAVABLE_BROWSER = Encoding.MIMECONTF_SAVABLE_BROWSER;
+
+ //
+ //s_flags is indexed by the corresponding index in s_mappedCodePages.
+ //
+ private static readonly uint[] s_flags = new uint[]
+ {
+ MIMECONTF_SAVABLE_BROWSER,
+ 0,
+ 0,
+ 0,
+ MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS,
+ MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER,
+ MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS,
+ MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER
+ };
+ }
+}
diff --git a/src/System.Private.CoreLib/shared/System/Text/EncodingInfo.cs b/src/System.Private.CoreLib/shared/System/Text/EncodingInfo.cs
index 8e71e58fab..b8d634c22e 100644
--- a/src/System.Private.CoreLib/shared/System/Text/EncodingInfo.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/EncodingInfo.cs
@@ -2,55 +2,24 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System;
-using System.Text;
-
namespace System.Text
{
public sealed class EncodingInfo
{
- private int iCodePage; // Code Page #
- private string strEncodingName; // Short name (web name)
- private string strDisplayName; // Full localized name
-
internal EncodingInfo(int codePage, string name, string displayName)
{
- iCodePage = codePage;
- strEncodingName = name;
- strDisplayName = displayName;
- }
-
-
- public int CodePage
- {
- get
- {
- return iCodePage;
- }
- }
-
-
- public string Name
- {
- get
- {
- return strEncodingName;
- }
- }
-
-
- public string DisplayName
- {
- get
- {
- return strDisplayName;
- }
+ CodePage = codePage;
+ Name = name;
+ DisplayName = displayName;
}
+ public int CodePage { get; }
+ public string Name { get; }
+ public string DisplayName { get; }
public Encoding GetEncoding()
{
- return Encoding.GetEncoding(iCodePage);
+ return Encoding.GetEncoding(CodePage);
}
public override bool Equals(object value)
@@ -58,14 +27,14 @@ namespace System.Text
EncodingInfo that = value as EncodingInfo;
if (that != null)
{
- return (this.CodePage == that.CodePage);
+ return this.CodePage == that.CodePage;
}
- return (false);
+ return false;
}
public override int GetHashCode()
{
- return this.CodePage;
+ return CodePage;
}
}
}
diff --git a/src/System.Private.CoreLib/shared/System/Text/EncodingTable.cs b/src/System.Private.CoreLib/shared/System/Text/EncodingTable.cs
new file mode 100644
index 0000000000..38f32e2636
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/EncodingTable.cs
@@ -0,0 +1,194 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections;
+using System.Diagnostics;
+using System.Threading;
+
+namespace System.Text
+{
+ //
+ // Data table for encoding classes. Used by System.Text.Encoding.
+ // This class contains two hashtables to allow System.Text.Encoding
+ // to retrieve the data item either by codepage value or by webName.
+ //
+ internal static partial class EncodingTable
+ {
+ private static readonly Hashtable s_nameToCodePage = Hashtable.Synchronized(new Hashtable(StringComparer.OrdinalIgnoreCase));
+ private static CodePageDataItem[] s_codePageToCodePageData;
+
+ /*=================================GetCodePageFromName==========================
+ **Action: Given a encoding name, return the correct code page number for this encoding.
+ **Returns: The code page for the encoding.
+ **Arguments:
+ ** name the name of the encoding
+ **Exceptions:
+ ** ArgumentNullException if name is null.
+ ** internalGetCodePageFromName will throw ArgumentException if name is not a valid encoding name.
+ ============================================================================*/
+
+ internal static int GetCodePageFromName(string name)
+ {
+ if (name == null)
+ throw new ArgumentNullException(nameof(name));
+
+ object codePageObj;
+ codePageObj = s_nameToCodePage[name];
+
+ if (codePageObj != null)
+ {
+ return (int)codePageObj;
+ }
+
+ int codePage = InternalGetCodePageFromName(name);
+
+ s_nameToCodePage[name] = codePage;
+
+ return codePage;
+ }
+
+ // Find the data item by binary searching the table.
+ private static int InternalGetCodePageFromName(string name)
+ {
+ int left = 0;
+ int right = s_encodingNameIndices.Length - 2;
+ int index;
+ int result;
+
+ Debug.Assert(s_encodingNameIndices.Length == s_codePagesByName.Length + 1);
+ Debug.Assert(s_encodingNameIndices[s_encodingNameIndices.Length - 1] == s_encodingNames.Length);
+
+ ReadOnlySpan<char> invariantName = name.ToLowerInvariant().AsSpan();
+
+ //Binary search the array until we have only a couple of elements left and then
+ //just walk those elements.
+ while ((right - left) > 3)
+ {
+ index = ((right - left) / 2) + left;
+
+ Debug.Assert(index < s_encodingNameIndices.Length - 1);
+ result = string.CompareOrdinal(invariantName, s_encodingNames.AsSpan(s_encodingNameIndices[index], s_encodingNameIndices[index + 1] - s_encodingNameIndices[index]));
+
+ if (result == 0)
+ {
+ //We found the item, return the associated codePage.
+ return s_codePagesByName[index];
+ }
+ else if (result < 0)
+ {
+ //The name that we're looking for is less than our current index.
+ right = index;
+ }
+ else
+ {
+ //The name that we're looking for is greater than our current index
+ left = index;
+ }
+ }
+
+ //Walk the remaining elements (it'll be 3 or fewer).
+ for (; left <= right; left++)
+ {
+ Debug.Assert(left < s_encodingNameIndices.Length - 1);
+ if (string.CompareOrdinal(invariantName, s_encodingNames.AsSpan(s_encodingNameIndices[left], s_encodingNameIndices[left + 1] - s_encodingNameIndices[left])) == 0)
+ {
+ return s_codePagesByName[left];
+ }
+ }
+
+ // The encoding name is not valid.
+ throw new ArgumentException(
+ SR.Format(SR.Argument_EncodingNotSupported, name),
+ nameof(name));
+ }
+
+ // Return a list of all EncodingInfo objects describing all of our encodings
+ internal static EncodingInfo[] GetEncodings()
+ {
+ EncodingInfo[] arrayEncodingInfo = new EncodingInfo[s_mappedCodePages.Length];
+
+ for (int i = 0; i < s_mappedCodePages.Length; i++)
+ {
+ arrayEncodingInfo[i] = new EncodingInfo(
+ s_mappedCodePages[i],
+ s_webNames.Substring(s_webNameIndices[i], s_webNameIndices[i + 1] - s_webNameIndices[i]),
+ GetDisplayName(s_mappedCodePages[i], i)
+ );
+ }
+
+ return arrayEncodingInfo;
+ }
+
+ internal static CodePageDataItem GetCodePageDataItem(int codePage)
+ {
+ if (s_codePageToCodePageData == null)
+ {
+ Interlocked.CompareExchange(ref s_codePageToCodePageData, new CodePageDataItem[s_mappedCodePages.Length], null);
+ }
+
+ // Keep in sync with s_mappedCodePages
+ int index;
+ switch (codePage)
+ {
+ case 1200: // utf-16
+ index = 0;
+ break;
+ case 1201: // utf-16be
+ index = 1;
+ break;
+ case 12000: // utf-32
+ index = 2;
+ break;
+ case 12001: // utf-32be
+ index = 3;
+ break;
+ case 20127: // us-ascii
+ index = 4;
+ break;
+ case 28591: // iso-8859-1
+ index = 5;
+ break;
+ case 65000: // utf-7
+ index = 6;
+ break;
+ case 65001: // utf-8
+ index = 7;
+ break;
+ default:
+ return null;
+ }
+
+ CodePageDataItem data = s_codePageToCodePageData[index];
+ if (data == null)
+ {
+ Interlocked.CompareExchange(ref s_codePageToCodePageData[index], InternalGetCodePageDataItem(codePage, index), null);
+ data = s_codePageToCodePageData[index];
+ }
+
+ return data;
+ }
+
+ private static CodePageDataItem InternalGetCodePageDataItem(int codePage, int index)
+ {
+ int uiFamilyCodePage = s_uiFamilyCodePages[index];
+ string webName = s_webNames.Substring(s_webNameIndices[index], s_webNameIndices[index + 1] - s_webNameIndices[index]);
+ // All supported code pages have identical header names, and body names.
+ string headerName = webName;
+ string bodyName = webName;
+ string displayName = GetDisplayName(codePage, index);
+ uint flags = s_flags[index];
+
+ return new CodePageDataItem(codePage, uiFamilyCodePage, webName, headerName, bodyName, displayName, flags);
+ }
+
+ private static string GetDisplayName(int codePage, int englishNameIndex)
+ {
+ string displayName = SR.GetResourceString("Globalization_cp_" + codePage.ToString());
+ if (string.IsNullOrEmpty(displayName))
+ displayName = s_englishNames.Substring(s_englishNameIndices[englishNameIndex], s_englishNameIndices[englishNameIndex + 1] - s_englishNameIndices[englishNameIndex]);
+
+ return displayName;
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/src/System/Text/CodePageDataItem.Unix.cs b/src/System.Private.CoreLib/src/System/Text/CodePageDataItem.Unix.cs
deleted file mode 100644
index 249457f0c1..0000000000
--- a/src/System.Private.CoreLib/src/System/Text/CodePageDataItem.Unix.cs
+++ /dev/null
@@ -1,68 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-namespace System.Text
-{
- internal class CodePageDataItem
- {
- private readonly int _codePage;
- private readonly int _uiFamilyCodePage;
- private readonly string _webName;
- private readonly uint _flags;
- private string _displayNameResourceKey;
-
- internal CodePageDataItem(int codePage, int uiFamilyCodePage, string webName, uint flags)
- {
- _codePage = codePage;
- _uiFamilyCodePage = uiFamilyCodePage;
- _webName = webName;
- _flags = flags;
- }
-
- public int CodePage
- {
- get { return _codePage; }
- }
-
- public int UIFamilyCodePage
- {
- get { return _uiFamilyCodePage; }
- }
-
- public String WebName
- {
- get { return _webName; }
- }
-
- public String HeaderName
- {
- get { return _webName; } // all the code pages used on unix only have a single name
- }
-
- public String BodyName
- {
- get { return _webName; } // all the code pages used on unix only have a single name
- }
-
- public uint Flags
- {
- get { return _flags; }
- }
-
- // PAL ends here
-
- public string DisplayNameResourceKey
- {
- get
- {
- if (_displayNameResourceKey == null)
- {
- _displayNameResourceKey = "Globalization_cp_" + CodePage;
- }
-
- return _displayNameResourceKey;
- }
- }
- }
-}
diff --git a/src/System.Private.CoreLib/src/System/Text/CodePageDataItem.cs b/src/System.Private.CoreLib/src/System/Text/CodePageDataItem.cs
deleted file mode 100644
index 488537641a..0000000000
--- a/src/System.Private.CoreLib/src/System/Text/CodePageDataItem.cs
+++ /dev/null
@@ -1,123 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System.Text;
-using System;
-using System.Security;
-
-namespace System.Text
-{
- //
- // Data item for EncodingTable. Along with EncodingTable, they are used by
- // System.Text.Encoding.
- //
- // This class stores a pointer to the internal data and the index into that data
- // where our required information is found. We load the code page, flags and uiFamilyCodePage
- // immediately because they don't require creating an object. Creating any of the string
- // names is delayed until somebody actually asks for them and the names are then cached.
-
- internal class CodePageDataItem
- {
- internal int m_dataIndex;
- internal int m_uiFamilyCodePage;
- internal string m_webName;
- internal string m_headerName;
- internal string m_bodyName;
- internal uint m_flags;
-
- internal unsafe CodePageDataItem(int dataIndex)
- {
- m_dataIndex = dataIndex;
- m_uiFamilyCodePage = EncodingTable.codePageDataPtr[dataIndex].uiFamilyCodePage;
- m_flags = EncodingTable.codePageDataPtr[dataIndex].flags;
- }
-
- internal static unsafe string CreateString(sbyte* pStrings, uint index)
- {
- if (pStrings[0] == '|') // |str1|str2|str3
- {
- int start = 1;
-
- for (int i = 1; true; i++)
- {
- sbyte ch = pStrings[i];
-
- if ((ch == '|') || (ch == 0))
- {
- if (index == 0)
- {
- return new string(pStrings, start, i - start);
- }
-
- index--;
- start = i + 1;
-
- if (ch == 0)
- {
- break;
- }
- }
- }
-
- throw new ArgumentException(null, nameof(pStrings));
- }
- else
- {
- return new string(pStrings);
- }
- }
-
- public unsafe string WebName
- {
- get
- {
- if (m_webName == null)
- {
- m_webName = CreateString(EncodingTable.codePageDataPtr[m_dataIndex].Names, 0);
- }
- return m_webName;
- }
- }
-
- public virtual int UIFamilyCodePage
- {
- get
- {
- return m_uiFamilyCodePage;
- }
- }
-
- public unsafe string HeaderName
- {
- get
- {
- if (m_headerName == null)
- {
- m_headerName = CreateString(EncodingTable.codePageDataPtr[m_dataIndex].Names, 1);
- }
- return m_headerName;
- }
- }
-
- public unsafe string BodyName
- {
- get
- {
- if (m_bodyName == null)
- {
- m_bodyName = CreateString(EncodingTable.codePageDataPtr[m_dataIndex].Names, 2);
- }
- return m_bodyName;
- }
- }
-
- public unsafe uint Flags
- {
- get
- {
- return (m_flags);
- }
- }
- }
-}
diff --git a/src/System.Private.CoreLib/src/System/Text/EncodingTable.Unix.cs b/src/System.Private.CoreLib/src/System/Text/EncodingTable.Unix.cs
deleted file mode 100644
index dda6d84a84..0000000000
--- a/src/System.Private.CoreLib/src/System/Text/EncodingTable.Unix.cs
+++ /dev/null
@@ -1,178 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Globalization;
-using System.Text;
-
-namespace System.Text
-{
- internal static class EncodingTable
- {
- // Return a list of all EncodingInfo objects describing all of our encodings
- internal static EncodingInfo[] GetEncodings()
- {
- EncodingInfo[] arrayEncodingInfo = new EncodingInfo[s_encodingDataTableItems.Length];
-
- for (int i = 0; i < s_encodingDataTableItems.Length; i++)
- {
- CodePageDataItem dataItem = s_encodingDataTableItems[i];
-
- arrayEncodingInfo[i] = new EncodingInfo(dataItem.CodePage, dataItem.WebName,
- SR.GetResourceString(dataItem.DisplayNameResourceKey));
- }
-
- return arrayEncodingInfo;
- }
-
- internal static int GetCodePageFromName(string name)
- {
- if (name == null)
- {
- throw new ArgumentNullException(nameof(name));
- }
-
- ushort codePage;
- if (s_encodingDataTable.TryGetValue(name, out codePage))
- {
- return codePage;
- }
-
- // The encoding name is not valid.
- throw new ArgumentException(
- string.Format(
- CultureInfo.CurrentCulture,
- SR.Argument_EncodingNotSupported, name), nameof(name));
- }
-
- internal static CodePageDataItem GetCodePageDataItem(int codepage)
- {
- CodePageDataItem item;
-
- switch (codepage)
- {
- case 1200:
- item = s_encodingDataTableItems[0];
- break;
- case 1201:
- item = s_encodingDataTableItems[1];
- break;
- case 12000:
- item = s_encodingDataTableItems[2];
- break;
- case 12001:
- item = s_encodingDataTableItems[3];
- break;
- case 20127:
- item = s_encodingDataTableItems[4];
- break;
- case 28591:
- item = s_encodingDataTableItems[5];
- break;
- case 65000:
- item = s_encodingDataTableItems[6];
- break;
- case 65001:
- item = s_encodingDataTableItems[7];
- break;
- default:
- item = null;
- break;
- }
-
- Debug.Assert(item == null || item.CodePage == codepage, "item.CodePage needs to equal the specified codepage");
- return item;
- }
-
- // PAL ends here.
-
-#if DEBUG
- static EncodingTable()
- {
- Debug.Assert(
- s_encodingDataTable.Count == EncodingTableCapacity,
- string.Format(CultureInfo.InvariantCulture,
- "EncodingTable s_encodingDataTable's initial capacity (EncodingTableCapacity) is incorrect.{0}Expected (s_encodingDataTable.Count): {1}, Actual (EncodingTableCapacity): {2}",
- Environment.NewLine,
- s_encodingDataTable.Count,
- EncodingTableCapacity));
- }
-#endif
-
- // NOTE: the following two lists were taken from ~\src\classlibnative\nls\encodingdata.cpp
- // and should be kept in sync with those lists
-
- private const int EncodingTableCapacity = 42;
- private readonly static Dictionary<string, ushort> s_encodingDataTable =
- new Dictionary<string, ushort>(EncodingTableCapacity, StringComparer.OrdinalIgnoreCase)
- {
- { "ANSI_X3.4-1968", 20127 },
- { "ANSI_X3.4-1986", 20127 },
- { "ascii", 20127 },
- { "cp367", 20127 },
- { "cp819", 28591 },
- { "csASCII", 20127 },
- { "csISOLatin1", 28591 },
- { "csUnicode11UTF7", 65000 },
- { "IBM367", 20127 },
- { "ibm819", 28591 },
- { "ISO-10646-UCS-2", 1200 },
- { "iso-8859-1", 28591 },
- { "iso-ir-100", 28591 },
- { "iso-ir-6", 20127 },
- { "ISO646-US", 20127 },
- { "iso8859-1", 28591 },
- { "ISO_646.irv:1991", 20127 },
- { "iso_8859-1", 28591 },
- { "iso_8859-1:1987", 28591 },
- { "l1", 28591 },
- { "latin1", 28591 },
- { "ucs-2", 1200 },
- { "unicode", 1200},
- { "unicode-1-1-utf-7", 65000 },
- { "unicode-1-1-utf-8", 65001 },
- { "unicode-2-0-utf-7", 65000 },
- { "unicode-2-0-utf-8", 65001 },
- // People get confused about the FFFE here. We can't change this because it'd break existing apps.
- // This has been this way for a long time, including in Mlang.
- // Big Endian, BOM seems backwards, think of the BOM in little endian order.
- { "unicodeFFFE", 1201},
- { "us", 20127 },
- { "us-ascii", 20127 },
- { "utf-16", 1200 },
- { "UTF-16BE", 1201},
- { "UTF-16LE", 1200},
- { "utf-32", 12000 },
- { "UTF-32BE", 12001 },
- { "UTF-32LE", 12000 },
- { "utf-7", 65000 },
- { "utf-8", 65001 },
- { "x-unicode-1-1-utf-7", 65000 },
- { "x-unicode-1-1-utf-8", 65001 },
- { "x-unicode-2-0-utf-7", 65000 },
- { "x-unicode-2-0-utf-8", 65001 },
- };
-
- // redeclaring these constants here for readability below
- private const uint MIMECONTF_MAILNEWS = Encoding.MIMECONTF_MAILNEWS;
- private const uint MIMECONTF_BROWSER = Encoding.MIMECONTF_BROWSER;
- private const uint MIMECONTF_SAVABLE_MAILNEWS = Encoding.MIMECONTF_SAVABLE_MAILNEWS;
- private const uint MIMECONTF_SAVABLE_BROWSER = Encoding.MIMECONTF_SAVABLE_BROWSER;
-
- // keep this array sorted by code page, so the order is consistent for GetEncodings()
- // Remember to update GetCodePageDataItem() if this list is updated
- private readonly static CodePageDataItem[] s_encodingDataTableItems = new[]
- {
- new CodePageDataItem(1200, 1200, "utf-16", MIMECONTF_SAVABLE_BROWSER), // "Unicode"
- new CodePageDataItem(1201, 1200, "utf-16BE", 0), // Big Endian, old FFFE BOM seems backwards, think of the BOM in little endian order.
- new CodePageDataItem(12000, 1200, "utf-32", 0), // "Unicode (UTF-32)"
- new CodePageDataItem(12001, 1200, "utf-32BE", 0), // "Unicode (UTF-32 Big Endian)"
- new CodePageDataItem(20127, 1252, "us-ascii", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "US-ASCII"
- new CodePageDataItem(28591, 1252, "iso-8859-1", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Western European (ISO)"
- new CodePageDataItem(65000, 1200, "utf-7", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Unicode (UTF-7)"
- new CodePageDataItem(65001, 1200, "utf-8", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Unicode (UTF-8)"
- };
- }
-}
diff --git a/src/System.Private.CoreLib/src/System/Text/EncodingTable.cs b/src/System.Private.CoreLib/src/System/Text/EncodingTable.cs
deleted file mode 100644
index c1795ed5b0..0000000000
--- a/src/System.Private.CoreLib/src/System/Text/EncodingTable.cs
+++ /dev/null
@@ -1,250 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using System.Text;
-using System.Collections;
-using System.Collections.Generic;
-using System.Globalization;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using System.Runtime.Versioning;
-using System.Security;
-using System.Threading;
-
-namespace System.Text
-{
- //
- // Data table for encoding classes. Used by System.Text.Encoding.
- // This class contains two hashtables to allow System.Text.Encoding
- // to retrieve the data item either by codepage value or by webName.
- //
-
- // Only statics, does not need to be marked with the serializable attribute
- internal static class EncodingTable
- {
- //This number is the size of the table in native. The value is retrieved by
- //calling the native GetNumEncodingItems().
- private static int lastEncodingItem = GetNumEncodingItems() - 1;
-
- //This number is the size of the code page table. Its generated when we walk the table the first time.
- private static volatile int lastCodePageItem;
-
- //
- // This points to a native data table which maps an encoding name to the correct code page.
- //
- internal static unsafe InternalEncodingDataItem* encodingDataPtr = GetEncodingData();
- //
- // This points to a native data table which stores the properties for the code page, and
- // the table is indexed by code page.
- //
- internal static unsafe InternalCodePageDataItem* codePageDataPtr = GetCodePageData();
- //
- // This caches the mapping of an encoding name to a code page.
- //
- private static Hashtable hashByName = Hashtable.Synchronized(new Hashtable(StringComparer.OrdinalIgnoreCase));
- //
- // THe caches the data item which is indexed by the code page value.
- //
- private static Hashtable hashByCodePage = Hashtable.Synchronized(new Hashtable());
-
- // Find the data item by binary searching the table that we have in native.
- // nativeCompareOrdinalWC is an internal-only function.
- private static unsafe int internalGetCodePageFromName(string name)
- {
- int left = 0;
- int right = lastEncodingItem;
- int index;
- int result;
-
- //Binary search the array until we have only a couple of elements left and then
- //just walk those elements.
- while ((right - left) > 3)
- {
- index = ((right - left) / 2) + left;
-
- result = nativeCompareOrdinalIgnoreCaseWC(name, encodingDataPtr[index].webName);
-
- if (result == 0)
- {
- //We found the item, return the associated codepage.
- return (encodingDataPtr[index].codePage);
- }
- else if (result < 0)
- {
- //The name that we're looking for is less than our current index.
- right = index;
- }
- else
- {
- //The name that we're looking for is greater than our current index
- left = index;
- }
- }
-
- //Walk the remaining elements (it'll be 3 or fewer).
- for (; left <= right; left++)
- {
- if (nativeCompareOrdinalIgnoreCaseWC(name, encodingDataPtr[left].webName) == 0)
- {
- return (encodingDataPtr[left].codePage);
- }
- }
- // The encoding name is not valid.
- throw new ArgumentException(
- string.Format(
- CultureInfo.CurrentCulture,
- SR.Argument_EncodingNotSupported, name), nameof(name));
- }
-
- // Return a list of all EncodingInfo objects describing all of our encodings
- internal static unsafe EncodingInfo[] GetEncodings()
- {
- if (lastCodePageItem == 0)
- {
- int count;
- for (count = 0; codePageDataPtr[count].codePage != 0; count++)
- {
- // Count them
- }
- lastCodePageItem = count;
- }
-
- EncodingInfo[] arrayEncodingInfo = new EncodingInfo[lastCodePageItem];
-
- int i;
- for (i = 0; i < lastCodePageItem; i++)
- {
- arrayEncodingInfo[i] = new EncodingInfo(codePageDataPtr[i].codePage, CodePageDataItem.CreateString(codePageDataPtr[i].Names, 0),
- SR.GetResourceString("Globalization_cp_" + codePageDataPtr[i].codePage));
- }
-
- return arrayEncodingInfo;
- }
-
- /*=================================GetCodePageFromName==========================
- **Action: Given a encoding name, return the correct code page number for this encoding.
- **Returns: The code page for the encoding.
- **Arguments:
- ** name the name of the encoding
- **Exceptions:
- ** ArgumentNullException if name is null.
- ** internalGetCodePageFromName will throw ArgumentException if name is not a valid encoding name.
- ============================================================================*/
-
- internal static int GetCodePageFromName(string name)
- {
- if (name == null)
- {
- throw new ArgumentNullException(nameof(name));
- }
-
- object codePageObj;
-
- //
- // The name is case-insensitive, but ToLower isn't free. Check for
- // the code page in the given capitalization first.
- //
- codePageObj = hashByName[name];
-
- if (codePageObj != null)
- {
- return ((int)codePageObj);
- }
-
- //Okay, we didn't find it in the hash table, try looking it up in the
- //unmanaged data.
- int codePage = internalGetCodePageFromName(name);
-
- hashByName[name] = codePage;
-
- return codePage;
- }
-
- internal static unsafe CodePageDataItem GetCodePageDataItem(int codepage)
- {
- CodePageDataItem dataItem;
-
- // We synchronize around dictionary gets/sets. There's still a possibility that two threads
- // will create a CodePageDataItem and the second will clobber the first in the dictionary.
- // However, that's acceptable because the contents are correct and we make no guarantees
- // other than that.
-
- //Look up the item in the hashtable.
- dataItem = (CodePageDataItem)hashByCodePage[codepage];
-
- //If we found it, return it.
- if (dataItem != null)
- {
- return dataItem;
- }
-
-
- //If we didn't find it, try looking it up now.
- //If we find it, add it to the hashtable.
- //This is a linear search, but we probably won't be doing it very often.
- //
- int i = 0;
- int data;
- while ((data = codePageDataPtr[i].codePage) != 0)
- {
- if (data == codepage)
- {
- dataItem = new CodePageDataItem(i);
- hashByCodePage[codepage] = dataItem;
- return (dataItem);
- }
- i++;
- }
-
- //Nope, we didn't find it.
- return null;
- }
-
- [MethodImplAttribute(MethodImplOptions.InternalCall)]
- private static extern unsafe InternalEncodingDataItem* GetEncodingData();
-
- //
- // Return the number of encoding data items.
- //
- [MethodImplAttribute(MethodImplOptions.InternalCall)]
- private static extern int GetNumEncodingItems();
-
- [MethodImplAttribute(MethodImplOptions.InternalCall)]
- private static extern unsafe InternalCodePageDataItem* GetCodePageData();
-
- //This will not work in case-insensitive mode for any character greater than 0x7F.
- //We'll throw an ArgumentException.
- [MethodImplAttribute(MethodImplOptions.InternalCall)]
- private static extern unsafe int nativeCompareOrdinalIgnoreCaseWC(string strA, sbyte* strBBytes);
- }
-
- /*=================================InternalEncodingDataItem==========================
- **Action: This is used to map a encoding name to a correct code page number. By doing this,
- ** we can get the properties of this encoding via the InternalCodePageDataItem.
- **
- ** We use this structure to access native data exposed by the native side.
- ============================================================================*/
-
- [System.Runtime.InteropServices.StructLayout(LayoutKind.Sequential)]
- internal unsafe struct InternalEncodingDataItem
- {
- internal sbyte* webName;
- internal ushort codePage;
- }
-
- /*=================================InternalCodePageDataItem==========================
- **Action: This is used to access the properties related to a code page.
- ** We use this structure to access native data exposed by the native side.
- ============================================================================*/
-
- [System.Runtime.InteropServices.StructLayout(LayoutKind.Sequential)]
- internal unsafe struct InternalCodePageDataItem
- {
- internal ushort codePage;
- internal ushort uiFamilyCodePage;
- internal uint flags;
- internal sbyte* Names;
- }
-}
diff --git a/src/classlibnative/CMakeLists.txt b/src/classlibnative/CMakeLists.txt
index 1c8d82566f..aeaba89b82 100644
--- a/src/classlibnative/CMakeLists.txt
+++ b/src/classlibnative/CMakeLists.txt
@@ -6,7 +6,3 @@ include_directories("../debug/inc/dump")
add_subdirectory(bcltype)
add_subdirectory(float)
-
-if(WIN32)
- add_subdirectory(nls)
-endif(WIN32)
diff --git a/src/classlibnative/bcltype/stringnative.cpp b/src/classlibnative/bcltype/stringnative.cpp
index f93c813b26..9462dd2a89 100644
--- a/src/classlibnative/bcltype/stringnative.cpp
+++ b/src/classlibnative/bcltype/stringnative.cpp
@@ -77,37 +77,6 @@ FCIMPL1(FC_BOOL_RET, COMString::IsAscii, StringObject* thisRef) {
FCIMPLEND
-
-//This function relies on the fact that we put a terminating null on the end of
-//all managed strings.
-FCIMPL2(INT32, COMString::FCCompareOrdinalIgnoreCaseWC, StringObject* strA, __in_z INT8 *strBChars) {
- FCALL_CONTRACT;
-
- VALIDATEOBJECT(strA);
- WCHAR *strAChars;
- WCHAR *strAStart;
- INT32 aLength;
- INT32 ret;
-
- _ASSERT(strA != NULL && strBChars != NULL);
-
- //Get our data.
- strA->RefInterpretGetStringValuesDangerousForGC((WCHAR **) &strAChars, &aLength);
-
- //Record the start pointer for some comparisons at the end.
- strAStart = strAChars;
-
- if (!StringObject::CaseInsensitiveCompHelper(strAChars, strBChars, aLength, -1, &ret)) {
- //This will happen if we have characters greater than 0x7F. This indicates that the function failed.
- // We don't throw an exception here. You can look at the success value returned to do something meaningful.
- ret = 1;
- }
-
- FC_GC_POLL_RET();
- return ret;
-}
-FCIMPLEND
-
/*==================================GETCHARAT===================================
**Returns the character at position index. Thows IndexOutOfRangeException as
**appropriate.
diff --git a/src/classlibnative/bcltype/stringnative.h b/src/classlibnative/bcltype/stringnative.h
index 86397436a3..bb3c3a803b 100644
--- a/src/classlibnative/bcltype/stringnative.h
+++ b/src/classlibnative/bcltype/stringnative.h
@@ -47,8 +47,6 @@ public:
static FCDECL1(FC_BOOL_RET, IsFastSort, StringObject* pThisRef);
static FCDECL1(FC_BOOL_RET, IsAscii, StringObject* pThisRef);
- static FCDECL2(INT32, FCCompareOrdinalIgnoreCaseWC, StringObject* strA, __in_z INT8 *strB);
-
static FCDECL6(INT32, CompareOrdinalEx, StringObject* strA, INT32 indexA, INT32 countA, StringObject* strB, INT32 indexB, INT32 countB);
static FCDECL2(FC_CHAR_RET, GetCharAt, StringObject* pThisRef, INT32 index);
diff --git a/src/classlibnative/inc/nlsinfo.h b/src/classlibnative/inc/nlsinfo.h
deleted file mode 100644
index ec06913f92..0000000000
--- a/src/classlibnative/inc/nlsinfo.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-////////////////////////////////////////////////////////////////////////////
-//
-// Class: NLSInfo
-//
-
-//
-// Purpose: This module defines the methods of the COMNlsInfo
-// class. These methods are the helper functions for the
-// managed NLS+ classes.
-//
-// Date: August 12, 1998
-//
-////////////////////////////////////////////////////////////////////////////
-
-#ifndef _NLSINFO_H_
-#define _NLSINFO_H_
-
-//
-//This structure must map 1-for-1 with the InternalDataItem structure in
-//System.Globalization.EncodingTable.
-//
-struct EncodingDataItem {
- const char * webName;
- unsigned short codePage;
- // free space here
-};
-
-//
-//This structure must map 1-for-1 with the InternalCodePageDataItem structure in
-//System.Globalization.EncodingTable.
-//
-struct CodePageDataItem {
- unsigned short codePage;
- unsigned short uiFamilyCodePage;
- DWORD dwFlags; // only 4-bit used now
- const char * names;
-};
-
-class COMNlsInfo
-{
-public:
-
- //
- // Native helper functions for methods in DateTimeFormatInfo
- //
- static FCDECL1(FC_BOOL_RET, nativeSetThreadLocale, StringObject* localeNameUNSAFE);
-
- //
- // Native helper functions for CultureData
- //
-
- static INT32 QCALLTYPE InternalGetGlobalizedHashCode(INT_PTR handle, LPCWSTR localeName, LPCWSTR pString, INT32 length, INT32 dwFlagsIn);
-
- //
- // Native helper function for methods in EncodingTable
- //
- static FCDECL0(INT32, nativeGetNumEncodingItems);
- static FCDECL0(EncodingDataItem *, nativeGetEncodingTableDataPointer);
- static FCDECL0(CodePageDataItem *, nativeGetCodePageTableDataPointer);
-
-private:
- //
- // Internal encoding data tables.
- //
- const static int m_nEncodingDataTableItems;
- const static EncodingDataItem EncodingDataTable[];
-
- const static int m_nCodePageTableItems;
- const static CodePageDataItem CodePageDataTable[];
-};
-
-#endif // _NLSINFO_H_ \ No newline at end of file
diff --git a/src/classlibnative/nls/CMakeLists.txt b/src/classlibnative/nls/CMakeLists.txt
deleted file mode 100644
index d6451b96b7..0000000000
--- a/src/classlibnative/nls/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-set( COMMLS_WKS_SOURCES
- encodingdata.cpp
- nlsinfo.cpp
-)
-
-add_library_clr(comnls_wks ${COMMLS_WKS_SOURCES})
diff --git a/src/classlibnative/nls/encodingdata.cpp b/src/classlibnative/nls/encodingdata.cpp
deleted file mode 100644
index c6021256be..0000000000
--- a/src/classlibnative/nls/encodingdata.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-#include "common.h"
-
-#include <mlang.h>
-
-#include "nlsinfo.h"
-
-//
-// Encoding data tables
-//
-
-//
-// Index an encoding name into an codepage in CodePageDataTable.
-//
-// Please KEEP this table SORTED ALPHABETICALLY! We do a binary search on this array.
-const EncodingDataItem COMNlsInfo::EncodingDataTable[] = {
- // encoding name, codepage.
- {"ANSI_X3.4-1968", 20127 },
- {"ANSI_X3.4-1986", 20127 },
- {"ascii", 20127 },
- {"cp367", 20127 },
- {"cp819", 28591 },
- {"csASCII", 20127 },
- {"csISOLatin1", 28591 },
- {"csUnicode11UTF7", 65000 },
- {"IBM367", 20127 },
- {"ibm819", 28591 },
- {"ISO-10646-UCS-2", 1200 },
- {"iso-8859-1", 28591 },
- {"iso-ir-100", 28591 },
- {"iso-ir-6", 20127 },
- {"ISO646-US", 20127 },
- {"iso8859-1", 28591 },
- {"ISO_646.irv:1991", 20127 },
- {"iso_8859-1", 28591 },
- {"iso_8859-1:1987", 28591 },
- {"l1", 28591 },
- {"latin1", 28591 },
- {"ucs-2", 1200 },
- {"unicode", 1200},
- {"unicode-1-1-utf-7", 65000 },
- {"unicode-1-1-utf-8", 65001 },
- {"unicode-2-0-utf-7", 65000 },
- {"unicode-2-0-utf-8", 65001 },
- // People get confused about the FFFE here. We can't change this because it'd break existing apps.
- // This has been this way for a long time, including in Mlang.
- {"unicodeFFFE", 1201}, // Big Endian, BOM seems backwards, think of the BOM in little endian order.
- {"us", 20127 },
- {"us-ascii", 20127 },
- {"utf-16", 1200 },
- {"UTF-16BE", 1201},
- {"UTF-16LE", 1200},
- {"utf-32", 12000 },
- {"UTF-32BE", 12001 },
- {"UTF-32LE", 12000 },
- {"utf-7", 65000 },
- {"utf-8", 65001 },
- {"x-unicode-1-1-utf-7", 65000 },
- {"x-unicode-1-1-utf-8", 65001 },
- {"x-unicode-2-0-utf-7", 65000 },
- {"x-unicode-2-0-utf-8", 65001 },
-
-};
-
-const int COMNlsInfo::m_nEncodingDataTableItems =
- sizeof(COMNlsInfo::EncodingDataTable)/sizeof(EncodingDataItem);
-
-// Working set optimization:
-// 1. code page, family code page stored as unsigned short
-// 2. if web/header/body names are the same, only web name is stored; otherwise, we store "|webname|headername|bodyname"
-// 3. Move flags before names to fill gap on 64-bit platforms
-
-#define MapCodePageDataItem(cp, fcp, names, flags) { cp, fcp, flags, names }
-//
-// Information about codepages.
-//
-const CodePageDataItem COMNlsInfo::CodePageDataTable[] = {
-
-
-// Total Items:
-// code page, family code page, web name, header name, body name, flags
-
- MapCodePageDataItem( 1200, 1200, "utf-16", MIMECONTF_SAVABLE_BROWSER), // "Unicode"
- MapCodePageDataItem( 1201, 1200, "utf-16BE", 0), // Big Endian, old FFFE BOM seems backwards, think of the BOM in little endian order.
- MapCodePageDataItem( 12000, 1200, "utf-32", 0), // "Unicode (UTF-32)"
- MapCodePageDataItem( 12001, 1200, "utf-32BE", 0), // "Unicode (UTF-32 Big Endian)"
- MapCodePageDataItem( 20127, 1252, "us-ascii", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "US-ASCII"
- MapCodePageDataItem( 28591, 1252, "iso-8859-1", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Western European (ISO)"
- MapCodePageDataItem( 65000, 1200, "utf-7", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Unicode (UTF-7)"
- MapCodePageDataItem( 65001, 1200, "utf-8", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Unicode (UTF-8)"
-
-
- // End of data.
- MapCodePageDataItem( 0, 0, NULL, 0),
-
-};
-
-const int COMNlsInfo::m_nCodePageTableItems =
- sizeof(COMNlsInfo::CodePageDataTable)/sizeof(CodePageDataItem);
-
diff --git a/src/classlibnative/nls/nlsinfo.cpp b/src/classlibnative/nls/nlsinfo.cpp
deleted file mode 100644
index 7699b4a231..0000000000
--- a/src/classlibnative/nls/nlsinfo.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-////////////////////////////////////////////////////////////////////////////
-//
-// Class: NLSInfo
-//
-
-//
-// Purpose: This module implements the methods of the COMNlsInfo
-// class. These methods are the helper functions for the
-// Locale class.
-//
-// Date: August 12, 1998
-//
-////////////////////////////////////////////////////////////////////////////
-
-//
-// Include Files.
-//
-#include "common.h"
-#include "object.h"
-#include "excep.h"
-#include "vars.hpp"
-#include "interoputil.h"
-#include "corhost.h"
-
-#include <winnls.h>
-
-#include "utilcode.h"
-#include "frames.h"
-#include "field.h"
-#include "metasig.h"
-#include "nls.h"
-#include "nlsinfo.h"
-
-/**
- * This function returns a pointer to this table that we use in System.Globalization.EncodingTable.
- * No error checking of any sort is performed. Range checking is entirely the responsibility of the managed
- * code.
- */
-FCIMPL0(EncodingDataItem *, COMNlsInfo::nativeGetEncodingTableDataPointer)
-{
- LIMITED_METHOD_CONTRACT;
- STATIC_CONTRACT_SO_TOLERANT;
-
- return (EncodingDataItem *)EncodingDataTable;
-}
-FCIMPLEND
-
-/**
- * This function returns a pointer to this table that we use in System.Globalization.EncodingTable.
- * No error checking of any sort is performed. Range checking is entirely the responsibility of the managed
- * code.
- */
-FCIMPL0(CodePageDataItem *, COMNlsInfo::nativeGetCodePageTableDataPointer)
-{
- LIMITED_METHOD_CONTRACT;
-
- STATIC_CONTRACT_SO_TOLERANT;
-
- return ((CodePageDataItem*) CodePageDataTable);
-}
-FCIMPLEND
-
-/**
- * This function returns the number of items in EncodingDataTable.
- */
-FCIMPL0(INT32, COMNlsInfo::nativeGetNumEncodingItems)
-{
- LIMITED_METHOD_CONTRACT;
- STATIC_CONTRACT_SO_TOLERANT;
-
- return (m_nEncodingDataTableItems);
-}
-FCIMPLEND
diff --git a/src/dlls/mscoree/coreclr/CMakeLists.txt b/src/dlls/mscoree/coreclr/CMakeLists.txt
index 1917ad39c7..2619cba6cc 100644
--- a/src/dlls/mscoree/coreclr/CMakeLists.txt
+++ b/src/dlls/mscoree/coreclr/CMakeLists.txt
@@ -105,7 +105,6 @@ if(WIN32)
${STATIC_MT_CRT_LIB}
${STATIC_MT_VCRT_LIB}
mdwinmd_wks
- comnls_wks
kernel32.lib
advapi32.lib
ole32.lib
diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp
index 159c91b601..c7b330c72b 100644
--- a/src/vm/ceemain.cpp
+++ b/src/vm/ceemain.cpp
@@ -153,9 +153,6 @@
#include "apithreadstress.h"
#include "perflog.h"
#include "../dlls/mscorrc/resource.h"
-#ifdef FEATURE_USE_LCID
-#include "nlsinfo.h"
-#endif
#include "util.hpp"
#include "shimload.h"
#include "comthreadpool.h"
diff --git a/src/vm/ecalllist.h b/src/vm/ecalllist.h
index 0217052319..05ced0fc68 100644
--- a/src/vm/ecalllist.h
+++ b/src/vm/ecalllist.h
@@ -743,15 +743,6 @@ FCFuncStart(gClrConfig)
QCFuncElement("GetConfigBoolValue", ClrConfigNative::GetConfigBoolValue)
FCFuncEnd()
-#if !defined(FEATURE_COREFX_GLOBALIZATION)
-FCFuncStart(gEncodingTableFuncs)
- FCFuncElement("GetNumEncodingItems", COMNlsInfo::nativeGetNumEncodingItems)
- FCFuncElement("GetEncodingData", COMNlsInfo::nativeGetEncodingTableDataPointer)
- FCFuncElement("GetCodePageData", COMNlsInfo::nativeGetCodePageTableDataPointer)
- FCFuncElement("nativeCompareOrdinalIgnoreCaseWC", COMString::FCCompareOrdinalIgnoreCaseWC)
-FCFuncEnd()
-#endif // !defined(FEATURE_COREFX_GLOBALIZATION)
-
FCFuncStart(gArrayFuncs)
FCFuncElement("get_Rank", ArrayNative::GetRank)
FCFuncElement("GetLowerBound", ArrayNative::GetLowerBound)
@@ -1245,9 +1236,6 @@ FCClassElement("Debugger", "System.Diagnostics", gDiagnosticsDebugger)
FCClassElement("DefaultBinder", "System", gCOMDefaultBinderFuncs)
FCClassElement("Delegate", "System", gDelegateFuncs)
FCClassElement("DependentHandle", "System.Runtime.CompilerServices", gDependentHandleFuncs)
-#if !defined(FEATURE_COREFX_GLOBALIZATION)
-FCClassElement("EncodingTable", "System.Text", gEncodingTableFuncs)
-#endif // !defined(FEATURE_COREFX_GLOBALIZATION)
FCClassElement("Enum", "System", gEnumFuncs)
FCClassElement("Environment", "System", gEnvironmentFuncs)
#if defined(FEATURE_PERFTRACING)
diff --git a/src/vm/mscorlib.cpp b/src/vm/mscorlib.cpp
index ca7f6b6bb5..4fa94499db 100644
--- a/src/vm/mscorlib.cpp
+++ b/src/vm/mscorlib.cpp
@@ -32,7 +32,6 @@
#include "comdynamic.h"
#include "excep.h"
#include "fcall.h"
-#include "nlsinfo.h"
#include "clrconfignative.h"
#include "commodule.h"
#include "marshalnative.h"