1 files changed, 125 insertions, 235 deletions
diff --git a/src/mscorlib/src/System/Globalization/CharUnicodeInfo.cs b/src/mscorlib/src/System/Globalization/CharUnicodeInfo.cs
index 2822b418ef..8e3bb47424 100644
--- a/src/mscorlib/src/System/Globalization/CharUnicodeInfo.cs
+++ b/src/mscorlib/src/System/Globalization/CharUnicodeInfo.cs
@@ -12,22 +12,12 @@
 //
 ////////////////////////////////////////////////////////////////////////////
 
-namespace System.Globalization {
+using System.Diagnostics;
+using System.Diagnostics.Contracts;
 
-    //This class has only static members and therefore doesn't need to be serialized.
-
-    using System;
-    using System.Threading;
-    using System.Runtime.InteropServices;
-    using System.Runtime.CompilerServices;
-    using System.Runtime.Versioning;
-    using System.Reflection;
-    using System.Security;
-    using System.Diagnostics;
-    using System.Diagnostics.Contracts;
-
-
-    public static class CharUnicodeInfo
+namespace System.Globalization
+{
+    public static partial class CharUnicodeInfo
     {
         //--------------------------------------------------------------------//
         //                        Internal Information                        //
@@ -36,95 +26,18 @@ namespace System.Globalization {
         //
         // Native methods to access the Unicode category data tables in charinfo.nlp.
         //
-        internal const char  HIGH_SURROGATE_START  = '\ud800';
-        internal const char  HIGH_SURROGATE_END    = '\udbff';
-        internal const char  LOW_SURROGATE_START   = '\udc00';
-        internal const char  LOW_SURROGATE_END     = '\udfff';
+        internal const char HIGH_SURROGATE_START = '\ud800';
+        internal const char HIGH_SURROGATE_END = '\udbff';
+        internal const char LOW_SURROGATE_START = '\udc00';
+        internal const char LOW_SURROGATE_END = '\udfff';
 
         internal const int UNICODE_CATEGORY_OFFSET = 0;
         internal const int BIDI_CATEGORY_OFFSET = 1;
 
-        static bool s_initialized = InitTable();
-
-        // The native pointer to the 12:4:4 index table of the Unicode cateogry data.
-        unsafe static ushort* s_pCategoryLevel1Index;
-        unsafe static byte* s_pCategoriesValue;
-
-        // The native pointer to the 12:4:4 index table of the Unicode numeric data.
-        // The value of this index table is an index into the real value table stored in s_pNumericValues.
-        unsafe static ushort* s_pNumericLevel1Index;
-
-        // The numeric value table, which is indexed by s_pNumericLevel1Index.
-        // Every item contains the value for numeric value.
-        // unsafe static double* s_pNumericValues;
-        // To get around the IA64 alignment issue.  Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting
-        // at 4-byte boundary.  This cause a alignment issue since double is 8-byte.
-        unsafe static byte* s_pNumericValues;
-
-        // The digit value table, which is indexed by s_pNumericLevel1Index.  It shares the same indice as s_pNumericValues.
-        // Every item contains the value for decimal digit/digit value.
-        unsafe static DigitValues* s_pDigitValues;
-
-        internal const String UNICODE_INFO_FILE_NAME = "charinfo.nlp";
         // The starting codepoint for Unicode plane 1.  Plane 1 contains 0x010000 ~ 0x01ffff.
         internal const int UNICODE_PLANE01_START = 0x10000;
 
 
-        //
-        // This is the header for the native data table that we load from UNICODE_INFO_FILE_NAME.
-        //
-        // Excplicit layout is used here since a syntax like char[16] can not be used in sequential layout.
-        [StructLayout(LayoutKind.Explicit)]
-        internal unsafe struct UnicodeDataHeader {
-            [FieldOffset(0)]
-            internal char TableName;    // WCHAR[16]
-            [FieldOffset(0x20)]
-            internal ushort version;    // WORD[4]
-            [FieldOffset(0x28)]
-            internal uint OffsetToCategoriesIndex; // DWORD
-            [FieldOffset(0x2c)]
-            internal uint OffsetToCategoriesValue; // DWORD
-            [FieldOffset(0x30)]
-            internal uint OffsetToNumbericIndex; // DWORD
-            [FieldOffset(0x34)]
-            internal uint OffsetToDigitValue; // DWORD
-            [FieldOffset(0x38)]
-            internal uint OffsetToNumbericValue; // DWORD
-
-        }
-
-        // NOTE: It's important to specify pack size here, since the size of the structure is 2 bytes.  Otherwise,
-        // the default pack size will be 4.
-
-        [StructLayout(LayoutKind.Sequential, Pack=2)]
-        internal struct DigitValues {
-            internal sbyte decimalDigit;
-            internal sbyte digit;
-        }
-
-
-        //We need to allocate the underlying table that provides us with the information that we
-        //use.  We allocate this once in the class initializer and then we don't need to worry
-        //about it again.
-        //
-        unsafe static bool InitTable() {
-
-            // Go to native side and get pointer to the native table
-            byte * pDataTable = GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(CharUnicodeInfo).Assembly, UNICODE_INFO_FILE_NAME);
-
-            UnicodeDataHeader* mainHeader = (UnicodeDataHeader*)pDataTable;
-
-            // Set up the native pointer to different part of the tables.
-            s_pCategoryLevel1Index = (ushort*) (pDataTable + mainHeader->OffsetToCategoriesIndex);
-            s_pCategoriesValue = (byte*) (pDataTable + mainHeader->OffsetToCategoriesValue);
-            s_pNumericLevel1Index = (ushort*) (pDataTable + mainHeader->OffsetToNumbericIndex);
-            s_pNumericValues = (byte*) (pDataTable + mainHeader->OffsetToNumbericValue);
-            s_pDigitValues = (DigitValues*) (pDataTable + mainHeader->OffsetToDigitValue);
-
-            return true;
-        }
-
-
         ////////////////////////////////////////////////////////////////////////
         //
         // Actions:
@@ -137,14 +50,18 @@ namespace System.Globalization {
         //
         ////////////////////////////////////////////////////////////////////////
 
-        internal static int InternalConvertToUtf32(String s, int index) {
+        internal static int InternalConvertToUtf32(String s, int index)
+        {
             Debug.Assert(s != null, "s != null");
             Debug.Assert(index >= 0 && index < s.Length, "index < s.Length");
-            if (index < s.Length - 1) {
+            if (index < s.Length - 1)
+            {
                 int temp1 = (int)s[index] - HIGH_SURROGATE_START;
-                if (temp1 >= 0 && temp1 <= 0x3ff) {
-                    int temp2 = (int)s[index+1] - LOW_SURROGATE_START;
-                    if (temp2 >= 0 && temp2 <= 0x3ff) {
+                if (temp1 >= 0 && temp1 <= 0x3ff)
+                {
+                    int temp2 = (int)s[index + 1] - LOW_SURROGATE_START;
+                    if (temp2 >= 0 && temp2 <= 0x3ff)
+                    {
                         // Convert the surrogate to UTF32 and get the result.
                         return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
                     }
@@ -152,7 +69,6 @@ namespace System.Globalization {
             }
             return ((int)s[index]);
         }
-
         ////////////////////////////////////////////////////////////////////////
         //
         // Convert a character or a surrogate pair starting at index of string s
@@ -175,16 +91,20 @@ namespace System.Globalization {
         //
         ////////////////////////////////////////////////////////////////////////
 
-        internal static int InternalConvertToUtf32(String s, int index, out int charLength) {
+        internal static int InternalConvertToUtf32(String s, int index, out int charLength)
+        {
             Debug.Assert(s != null, "s != null");
             Debug.Assert(s.Length > 0, "s.Length > 0");
             Debug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length");
             charLength = 1;
-            if (index < s.Length - 1) {
+            if (index < s.Length - 1)
+            {
                 int temp1 = (int)s[index] - HIGH_SURROGATE_START;
-                if (temp1 >= 0 && temp1 <= 0x3ff) {
-                    int temp2 = (int)s[index+1] - LOW_SURROGATE_START;
-                    if (temp2 >= 0 && temp2 <= 0x3ff) {
+                if (temp1 >= 0 && temp1 <= 0x3ff)
+                {
+                    int temp2 = (int)s[index + 1] - LOW_SURROGATE_START;
+                    if (temp2 >= 0 && temp2 <= 0x3ff)
+                    {
                         // Convert the surrogate to UTF32 and get the result.
                         charLength++;
                         return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
@@ -210,7 +130,8 @@ namespace System.Globalization {
             UnicodeCategory uc = GetUnicodeCategory(s, index);
             // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator".
             // And U+2029 is th eonly character which is under the category "ParagraphSeparator".
-            switch (uc) {
+            switch (uc)
+            {
                 case (UnicodeCategory.SpaceSeparator):
                 case (UnicodeCategory.LineSeparator):
                 case (UnicodeCategory.ParagraphSeparator):
@@ -225,7 +146,8 @@ namespace System.Globalization {
             UnicodeCategory uc = GetUnicodeCategory(c);
             // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator".
             // And U+2029 is th eonly character which is under the category "ParagraphSeparator".
-            switch (uc) {
+            switch (uc)
+            {
                 case (UnicodeCategory.SpaceSeparator):
                 case (UnicodeCategory.LineSeparator):
                 case (UnicodeCategory.ParagraphSeparator):
@@ -235,12 +157,14 @@ namespace System.Globalization {
             return (false);
         }
 
+
         //
         // This is called by the public char and string, index versions
         //
         // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character
         //
-        internal unsafe static double InternalGetNumericValue(int ch) {
+        internal static unsafe double InternalGetNumericValue(int ch)
+        {
             Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range.");
             // Get the level 2 item from the highest 12 bit (8 - 19) of ch.
             ushort index = s_pNumericLevel1Index[ch >> 8];
@@ -248,52 +172,34 @@ namespace System.Globalization {
             // The offset is referred to an float item in m_pNumericFloatData.
             // Note that & has the lower precedence than addition, so don't forget the parathesis.
             index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)];
-            byte* pBytePtr = (byte*)&(s_pNumericLevel1Index[index]);
-            // Get the result from the 0 -3 bit of ch.
-#if BIT64
-            // To get around the IA64 alignment issue.  Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting
-            // at 4-byte boundary.  This cause a alignment issue since double is 8-byte.
-            byte* pSourcePtr = &(s_pNumericValues[pBytePtr[(ch & 0x000f)] * sizeof(double)]);
-            if (((long)pSourcePtr % 8) != 0) {
-                // We are not aligned in 8-byte boundary.  Do a copy.
-                double ret;
-                byte* retPtr = (byte*)&ret;
-                Buffer.Memcpy(retPtr, pSourcePtr, sizeof(double));
-                return (ret);
+
+            fixed (ushort* pUshortPtr = &(s_pNumericLevel1Index[index]))
+            {
+                byte* pBytePtr = (byte*)pUshortPtr;
+                fixed (byte* pByteNum = s_pNumericValues)
+                {
+                    double* pDouble = (double*)pByteNum;
+                    return pDouble[pBytePtr[(ch & 0x000f)]];
+                }
             }
-            return (((double*)s_pNumericValues)[pBytePtr[(ch & 0x000f)]]);
-#else
-            return (((double*)s_pNumericValues)[pBytePtr[(ch & 0x000f)]]);
-#endif
         }
 
-        //
-        // This is called by the public char and string, index versions
-        //
-        // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character
-        //        
-        internal unsafe static DigitValues* InternalGetDigitValues(int ch) {
+        internal static unsafe ushort InternalGetDigitValues(int ch)
+        {
             Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range.");
             // Get the level 2 item from the highest 12 bit (8 - 19) of ch.
             ushort index = s_pNumericLevel1Index[ch >> 8];
             // Get the level 2 WORD offset from the 4 - 7 bit of ch.  This provides the base offset of the level 3 table.
-            // The offset is referred to an float item in m_pNumericFloatData.
             // Note that & has the lower precedence than addition, so don't forget the parathesis.
             index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)];
-            byte* pBytePtr = (byte*)&(s_pNumericLevel1Index[index]);
-            // Get the result from the 0 -3 bit of ch.
-            return &(s_pDigitValues[pBytePtr[(ch & 0x000f)]]);
-        }
 
-        internal unsafe static sbyte InternalGetDecimalDigitValue(int ch) {
-            return (InternalGetDigitValues(ch)->decimalDigit);
-        }
-
-        internal unsafe static sbyte InternalGetDigitValue(int ch) {
-            return (InternalGetDigitValues(ch)->digit);
+            fixed (ushort* pUshortPtr = &(s_pNumericLevel1Index[index]))
+            {
+                byte* pBytePtr = (byte*)pUshortPtr;
+                return s_pDigitValues[pBytePtr[(ch & 0x000f)]];
+            }
         }
 
-
         ////////////////////////////////////////////////////////////////////////
         //
         //Returns the numeric value associated with the character c. If the character is a fraction,
@@ -310,114 +216,91 @@ namespace System.Globalization {
         ////////////////////////////////////////////////////////////////////////
 
 
-        public static double GetNumericValue(char ch) {
+        public static double GetNumericValue(char ch)
+        {
             return (InternalGetNumericValue(ch));
         }
 
 
-        public static double GetNumericValue(String s, int index) {
-            if (s == null) {
+        public static double GetNumericValue(String s, int index)
+        {
+            if (s == null)
+            {
                 throw new ArgumentNullException(nameof(s));
             }
-            if (index < 0 || index >= s.Length) {
-                throw new ArgumentOutOfRangeException(nameof(index), Environment.GetResourceString("ArgumentOutOfRange_Index"));
+            if (index < 0 || index >= s.Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
             }
             Contract.EndContractBlock();
             return (InternalGetNumericValue(InternalConvertToUtf32(s, index)));
-
         }
 
-        ////////////////////////////////////////////////////////////////////////
-        //
-        //Returns the decimal digit value associated with the character c.
-        //
-        // The value should be from 0 ~ 9.
-        // If the character does not have a numeric value, the return value is -1.
-        // From Unicode.org: Decimal Digits. Digits that can be used to form decimal-radix numbers.
-        //Returns:
-        //  the decimal digit value for the specified Unicode character.  If the character does not have a decimal digit value, the return value is -1.
-        //Arguments:
-        //      ch  a Unicode character
-        //Exceptions:
-        //      ArgumentNullException
-        //      ArgumentOutOfRangeException
-        //
-        ////////////////////////////////////////////////////////////////////////
-
-
-        public static int GetDecimalDigitValue(char ch) {
-            return (InternalGetDecimalDigitValue(ch));
+        public static int GetDecimalDigitValue(char ch)
+        {
+            return (sbyte)(InternalGetDigitValues(ch) >> 8);
         }
 
-
-        public static int GetDecimalDigitValue(String s, int index) {
-            if (s == null) {
+        public static int GetDecimalDigitValue(String s, int index)
+        {
+            if (s == null)
+            {
                 throw new ArgumentNullException(nameof(s));
             }
-            if (index < 0 || index >= s.Length) {
-                throw new ArgumentOutOfRangeException(nameof(index), Environment.GetResourceString("ArgumentOutOfRange_Index"));
+
+            if (index < 0 || index >= s.Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
             }
             Contract.EndContractBlock();
 
-            return (InternalGetDecimalDigitValue(InternalConvertToUtf32(s, index)));
+            return (sbyte)(InternalGetDigitValues(InternalConvertToUtf32(s, index)) >> 8);
         }
 
-        ////////////////////////////////////////////////////////////////////////
-        //
-        //Action: Returns the digit value associated with the character c.
-        // If the character does not have a numeric value, the return value is -1.
-        // From Unicode.org: If the character represents a digit, not necessarily a decimal digit,
-        // the value is here. This covers digits which do not form decimal radix forms, such as the compatibility superscript digits.
-        //
-        // An example is: U+2460 IRCLED DIGIT ONE. This character has digit value 1, but does not have associcated decimal digit value.
-        //
-        //Returns:
-        //  the digit value for the specified Unicode character.  If the character does not have a digit value, the return value is -1.
-        //Arguments:
-        //      ch  a Unicode character
-        //Exceptions:
-        //      ArgumentNullException
-        //      ArgumentOutOfRangeException
-        //
-        ////////////////////////////////////////////////////////////////////////
-
-
-        public static int GetDigitValue(char ch) {
-            return (InternalGetDigitValue(ch));
+        public static int GetDigitValue(char ch)
+        {
+            return (sbyte)(InternalGetDigitValues(ch) & 0x00FF);
         }
 
-
-        public static int GetDigitValue(String s, int index) {
-            if (s == null) {
+        public static int GetDigitValue(String s, int index)
+        {
+            if (s == null)
+            {
                 throw new ArgumentNullException(nameof(s));
             }
-            if (index < 0 || index >= s.Length) {
-                throw new ArgumentOutOfRangeException(nameof(index), Environment.GetResourceString("ArgumentOutOfRange_Index"));
+
+            if (index < 0 || index >= s.Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
             }
+
             Contract.EndContractBlock();
-            return (InternalGetDigitValue(InternalConvertToUtf32(s, index)));
+            return (sbyte)(InternalGetDigitValues(InternalConvertToUtf32(s, index)) & 0x00FF);
         }
 
         public static UnicodeCategory GetUnicodeCategory(char ch)
         {
-            return (InternalGetUnicodeCategory(ch)) ;
+            return (InternalGetUnicodeCategory(ch));
         }
 
         public static UnicodeCategory GetUnicodeCategory(String s, int index)
         {
-            if (s==null)
+            if (s == null)
                 throw new ArgumentNullException(nameof(s));
-            if (((uint)index)>=((uint)s.Length)) {
+            if (((uint)index) >= ((uint)s.Length))
+            {
                 throw new ArgumentOutOfRangeException(nameof(index));
             }
             Contract.EndContractBlock();
             return InternalGetUnicodeCategory(s, index);
         }
 
-        internal unsafe static UnicodeCategory InternalGetUnicodeCategory(int ch) {
+        internal static unsafe UnicodeCategory InternalGetUnicodeCategory(int ch)
+        {
             return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET));
         }
 
+
         ////////////////////////////////////////////////////////////////////////
         //
         //Action: Returns the Unicode Category property for the character c.
@@ -432,37 +315,28 @@ namespace System.Globalization {
         //
         ////////////////////////////////////////////////////////////////////////
 
-        internal unsafe static byte InternalGetCategoryValue(int ch, int offset) {
+        internal static unsafe byte InternalGetCategoryValue(int ch, int offset)
+        {
             Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range.");
             // Get the level 2 item from the highest 12 bit (8 - 19) of ch.
             ushort index = s_pCategoryLevel1Index[ch >> 8];
             // Get the level 2 WORD offset from the 4 - 7 bit of ch.  This provides the base offset of the level 3 table.
             // Note that & has the lower precedence than addition, so don't forget the parathesis.
             index = s_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)];
-            byte* pBytePtr = (byte*)&(s_pCategoryLevel1Index[index]);
-            // Get the result from the 0 -3 bit of ch.
-            byte valueIndex = pBytePtr[(ch & 0x000f)];
-            byte uc = s_pCategoriesValue[valueIndex * 2 + offset];
-            //
-            // Make sure that OtherNotAssigned is the last category in UnicodeCategory.
-            // If that changes, change the following assertion as well.
-            //
-            //Debug.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category");
-            return (uc);
-        }
 
-//      internal static BidiCategory GetBidiCategory(char ch) {
-//          return ((BidiCategory)InternalGetCategoryValue(c, BIDI_CATEGORY_OFFSET));
-//      }
-
-        internal static BidiCategory GetBidiCategory(String s, int index) {
-            if (s==null)
-                throw new ArgumentNullException(nameof(s));
-            if (((uint)index)>=((uint)s.Length)) {
-                throw new ArgumentOutOfRangeException(nameof(index));
+            fixed (ushort* pUshortPtr = &(s_pCategoryLevel1Index[index]))
+            {
+                byte* pBytePtr = (byte*)pUshortPtr;
+                // Get the result from the 0 -3 bit of ch.
+                byte valueIndex = pBytePtr[(ch & 0x000f)];
+                byte uc = s_pCategoriesValue[valueIndex * 2 + offset];
+                //
+                // Make sure that OtherNotAssigned is the last category in UnicodeCategory.
+                // If that changes, change the following assertion as well.
+                //
+                //Debug.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category");
+                return (uc);
             }
-            Contract.EndContractBlock();
-            return ((BidiCategory)InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET));
         }
 
         ////////////////////////////////////////////////////////////////////////
@@ -478,13 +352,27 @@ namespace System.Globalization {
         //
         ////////////////////////////////////////////////////////////////////////
 
-        internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index) {
+        internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index)
+        {
             Debug.Assert(value != null, "value can not be null");
             Debug.Assert(index < value.Length, "index < value.Length");
 
             return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index)));
         }
 
+        internal static BidiCategory GetBidiCategory(String s, int index)
+        {
+            if (s == null)
+                throw new ArgumentNullException(nameof(s));
+
+            if (((uint)index) >= ((uint)s.Length))
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            return ((BidiCategory) InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET));
+        }
+
         ////////////////////////////////////////////////////////////////////////
         //
         // Get the Unicode category of the character starting at index.  If the character is in BMP, charLength will return 1.
@@ -492,15 +380,17 @@ namespace System.Globalization {
         //
         ////////////////////////////////////////////////////////////////////////
 
-        internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength) {
+        internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength)
+        {
             Debug.Assert(str != null, "str can not be null");
-            Debug.Assert(str.Length > 0, "str.Length > 0");;
+            Debug.Assert(str.Length > 0, "str.Length > 0"); ;
             Debug.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length");
 
             return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength)));
         }
 
-        internal static bool IsCombiningCategory(UnicodeCategory uc) {
+        internal static bool IsCombiningCategory(UnicodeCategory uc)
+        {
             Debug.Assert(uc >= 0, "uc >= 0");
             return (
                 uc == UnicodeCategory.NonSpacingMark ||