diff options
Diffstat (limited to 'src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs')
-rw-r--r-- | src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs | 391 |
1 files changed, 0 insertions, 391 deletions
diff --git a/src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs b/src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs deleted file mode 100644 index 38ce441a78..0000000000 --- a/src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs +++ /dev/null @@ -1,391 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -//////////////////////////////////////////////////////////////////////////// -// -// -// Purpose: This class implements a set of methods for retrieving -// character type information. Character type information is -// independent of culture and region. -// -// -//////////////////////////////////////////////////////////////////////////// - -using System.Diagnostics; -using System.Diagnostics.Contracts; - -namespace System.Globalization -{ - public static partial class CharUnicodeInfo - { - //--------------------------------------------------------------------// - // Internal Information // - //--------------------------------------------------------------------// - - // - // Native methods to access the Unicode category data tables in charinfo.nlp. - // - internal const char HIGH_SURROGATE_START = '\ud800'; - internal const char HIGH_SURROGATE_END = '\udbff'; - internal const char LOW_SURROGATE_START = '\udc00'; - internal const char LOW_SURROGATE_END = '\udfff'; - - internal const int UNICODE_CATEGORY_OFFSET = 0; - internal const int BIDI_CATEGORY_OFFSET = 1; - - - - // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff. - internal const int UNICODE_PLANE01_START = 0x10000; - - - //////////////////////////////////////////////////////////////////////// - // - // Actions: - // Convert the BMP character or surrogate pointed by index to a UTF32 value. - // This is similar to Char.ConvertToUTF32, but the difference is that - // it does not throw exceptions when invalid surrogate characters are passed in. - // - // WARNING: since it doesn't throw an exception it CAN return a value - // in the surrogate range D800-DFFF, which are not legal unicode values. - // - //////////////////////////////////////////////////////////////////////// - - internal static int InternalConvertToUtf32(String s, int index) - { - Debug.Assert(s != null, "s != null"); - Debug.Assert(index >= 0 && index < s.Length, "index < s.Length"); - if (index < s.Length - 1) - { - int temp1 = (int)s[index] - HIGH_SURROGATE_START; - if (temp1 >= 0 && temp1 <= 0x3ff) - { - int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; - if (temp2 >= 0 && temp2 <= 0x3ff) - { - // Convert the surrogate to UTF32 and get the result. - return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); - } - } - } - return ((int)s[index]); - } - //////////////////////////////////////////////////////////////////////// - // - // Convert a character or a surrogate pair starting at index of string s - // to UTF32 value. - // - // Parameters: - // s The string - // index The starting index. It can point to a BMP character or - // a surrogate pair. - // len The length of the string. - // charLength [out] If the index points to a BMP char, charLength - // will be 1. If the index points to a surrogate pair, - // charLength will be 2. - // - // WARNING: since it doesn't throw an exception it CAN return a value - // in the surrogate range D800-DFFF, which are not legal unicode values. - // - // Returns: - // The UTF32 value - // - //////////////////////////////////////////////////////////////////////// - - internal static int InternalConvertToUtf32(String s, int index, out int charLength) - { - Debug.Assert(s != null, "s != null"); - Debug.Assert(s.Length > 0, "s.Length > 0"); - Debug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); - charLength = 1; - if (index < s.Length - 1) - { - int temp1 = (int)s[index] - HIGH_SURROGATE_START; - if (temp1 >= 0 && temp1 <= 0x3ff) - { - int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; - if (temp2 >= 0 && temp2 <= 0x3ff) - { - // Convert the surrogate to UTF32 and get the result. - charLength++; - return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); - } - } - } - return ((int)s[index]); - } - - //////////////////////////////////////////////////////////////////////// - // - // IsWhiteSpace - // - // Determines if the given character is a white space character. - // - //////////////////////////////////////////////////////////////////////// - - internal static bool IsWhiteSpace(String s, int index) - { - Debug.Assert(s != null, "s!=null"); - Debug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); - - UnicodeCategory uc = GetUnicodeCategory(s, index); - // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". - // And U+2029 is th eonly character which is under the category "ParagraphSeparator". - switch (uc) - { - case (UnicodeCategory.SpaceSeparator): - case (UnicodeCategory.LineSeparator): - case (UnicodeCategory.ParagraphSeparator): - return (true); - } - return (false); - } - - - internal static bool IsWhiteSpace(char c) - { - UnicodeCategory uc = GetUnicodeCategory(c); - // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". - // And U+2029 is th eonly character which is under the category "ParagraphSeparator". - switch (uc) - { - case (UnicodeCategory.SpaceSeparator): - case (UnicodeCategory.LineSeparator): - case (UnicodeCategory.ParagraphSeparator): - return (true); - } - - return (false); - } - - - // - // This is called by the public char and string, index versions - // - // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character - // - internal static unsafe double InternalGetNumericValue(int ch) - { - Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); - // Get the level 2 item from the highest 12 bit (8 - 19) of ch. - ushort index = s_pNumericLevel1Index[ch >> 8]; - // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. - // The offset is referred to an float item in m_pNumericFloatData. - // Note that & has the lower precedence than addition, so don't forget the parathesis. - index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; - - fixed (ushort* pUshortPtr = &(s_pNumericLevel1Index[index])) - { - byte* pBytePtr = (byte*)pUshortPtr; - fixed (byte* pByteNum = s_pNumericValues) - { - double* pDouble = (double*)pByteNum; - return pDouble[pBytePtr[(ch & 0x000f)]]; - } - } - } - - internal static unsafe ushort InternalGetDigitValues(int ch) - { - Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); - // Get the level 2 item from the highest 12 bit (8 - 19) of ch. - ushort index = s_pNumericLevel1Index[ch >> 8]; - // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. - // Note that & has the lower precedence than addition, so don't forget the parathesis. - index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; - - fixed (ushort* pUshortPtr = &(s_pNumericLevel1Index[index])) - { - byte* pBytePtr = (byte*)pUshortPtr; - return s_pDigitValues[pBytePtr[(ch & 0x000f)]]; - } - } - - //////////////////////////////////////////////////////////////////////// - // - //Returns the numeric value associated with the character c. If the character is a fraction, - // the return value will not be an integer. If the character does not have a numeric value, the return value is -1. - // - //Returns: - // the numeric value for the specified Unicode character. If the character does not have a numeric value, the return value is -1. - //Arguments: - // ch a Unicode character - //Exceptions: - // ArgumentNullException - // ArgumentOutOfRangeException - // - //////////////////////////////////////////////////////////////////////// - - - public static double GetNumericValue(char ch) - { - return (InternalGetNumericValue(ch)); - } - - - public static double GetNumericValue(String s, int index) - { - if (s == null) - { - throw new ArgumentNullException(nameof(s)); - } - if (index < 0 || index >= s.Length) - { - throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index); - } - Contract.EndContractBlock(); - return (InternalGetNumericValue(InternalConvertToUtf32(s, index))); - } - - public static int GetDecimalDigitValue(char ch) - { - return (sbyte)(InternalGetDigitValues(ch) >> 8); - } - - public static int GetDecimalDigitValue(String s, int index) - { - if (s == null) - { - throw new ArgumentNullException(nameof(s)); - } - - if (index < 0 || index >= s.Length) - { - throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index); - } - Contract.EndContractBlock(); - - return (sbyte)(InternalGetDigitValues(InternalConvertToUtf32(s, index)) >> 8); - } - - public static int GetDigitValue(char ch) - { - return (sbyte)(InternalGetDigitValues(ch) & 0x00FF); - } - - public static int GetDigitValue(String s, int index) - { - if (s == null) - { - throw new ArgumentNullException(nameof(s)); - } - - if (index < 0 || index >= s.Length) - { - throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index); - } - - Contract.EndContractBlock(); - return (sbyte)(InternalGetDigitValues(InternalConvertToUtf32(s, index)) & 0x00FF); - } - - public static UnicodeCategory GetUnicodeCategory(char ch) - { - return (InternalGetUnicodeCategory(ch)); - } - - public static UnicodeCategory GetUnicodeCategory(String s, int index) - { - if (s == null) - throw new ArgumentNullException(nameof(s)); - if (((uint)index) >= ((uint)s.Length)) - { - throw new ArgumentOutOfRangeException(nameof(index)); - } - Contract.EndContractBlock(); - return InternalGetUnicodeCategory(s, index); - } - - internal static unsafe UnicodeCategory InternalGetUnicodeCategory(int ch) - { - return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET)); - } - - - //////////////////////////////////////////////////////////////////////// - // - //Action: Returns the Unicode Category property for the character c. - //Returns: - // an value in UnicodeCategory enum - //Arguments: - // ch a Unicode character - //Exceptions: - // None - // - //Note that this API will return values for D800-DF00 surrogate halves. - // - //////////////////////////////////////////////////////////////////////// - - internal static unsafe byte InternalGetCategoryValue(int ch, int offset) - { - Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); - // Get the level 2 item from the highest 12 bit (8 - 19) of ch. - ushort index = s_pCategoryLevel1Index[ch >> 8]; - // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. - // Note that & has the lower precedence than addition, so don't forget the parathesis. - index = s_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)]; - - fixed (ushort* pUshortPtr = &(s_pCategoryLevel1Index[index])) - { - byte* pBytePtr = (byte*)pUshortPtr; - // Get the result from the 0 -3 bit of ch. - byte valueIndex = pBytePtr[(ch & 0x000f)]; - byte uc = s_pCategoriesValue[valueIndex * 2 + offset]; - // - // Make sure that OtherNotAssigned is the last category in UnicodeCategory. - // If that changes, change the following assertion as well. - // - //Debug.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category"); - return (uc); - } - } - - //////////////////////////////////////////////////////////////////////// - // - //Action: Returns the Unicode Category property for the character c. - //Returns: - // an value in UnicodeCategory enum - //Arguments: - // value a Unicode String - // index Index for the specified string. - //Exceptions: - // None - // - //////////////////////////////////////////////////////////////////////// - - internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index) - { - Debug.Assert(value != null, "value can not be null"); - Debug.Assert(index < value.Length, "index < value.Length"); - - return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index))); - } - - //////////////////////////////////////////////////////////////////////// - // - // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1. - // If the character is a valid surrogate pair, charLength will return 2. - // - //////////////////////////////////////////////////////////////////////// - - internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength) - { - Debug.Assert(str != null, "str can not be null"); - Debug.Assert(str.Length > 0, "str.Length > 0"); ; - Debug.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length"); - - return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength))); - } - - internal static bool IsCombiningCategory(UnicodeCategory uc) - { - Debug.Assert(uc >= 0, "uc >= 0"); - return ( - uc == UnicodeCategory.NonSpacingMark || - uc == UnicodeCategory.SpacingCombiningMark || - uc == UnicodeCategory.EnclosingMark - ); - } - } -} |