summaryrefslogtreecommitdiff
path: root/src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs')
-rw-r--r--src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs391
1 files changed, 0 insertions, 391 deletions
diff --git a/src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs b/src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs
deleted file mode 100644
index 38ce441a78..0000000000
--- a/src/mscorlib/corefx/System/Globalization/CharUnicodeInfo.cs
+++ /dev/null
@@ -1,391 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-////////////////////////////////////////////////////////////////////////////
-//
-//
-// Purpose: This class implements a set of methods for retrieving
-// character type information. Character type information is
-// independent of culture and region.
-//
-//
-////////////////////////////////////////////////////////////////////////////
-
-using System.Diagnostics;
-using System.Diagnostics.Contracts;
-
-namespace System.Globalization
-{
- public static partial class CharUnicodeInfo
- {
- //--------------------------------------------------------------------//
- // Internal Information //
- //--------------------------------------------------------------------//
-
- //
- // Native methods to access the Unicode category data tables in charinfo.nlp.
- //
- internal const char HIGH_SURROGATE_START = '\ud800';
- internal const char HIGH_SURROGATE_END = '\udbff';
- internal const char LOW_SURROGATE_START = '\udc00';
- internal const char LOW_SURROGATE_END = '\udfff';
-
- internal const int UNICODE_CATEGORY_OFFSET = 0;
- internal const int BIDI_CATEGORY_OFFSET = 1;
-
-
-
- // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff.
- internal const int UNICODE_PLANE01_START = 0x10000;
-
-
- ////////////////////////////////////////////////////////////////////////
- //
- // Actions:
- // Convert the BMP character or surrogate pointed by index to a UTF32 value.
- // This is similar to Char.ConvertToUTF32, but the difference is that
- // it does not throw exceptions when invalid surrogate characters are passed in.
- //
- // WARNING: since it doesn't throw an exception it CAN return a value
- // in the surrogate range D800-DFFF, which are not legal unicode values.
- //
- ////////////////////////////////////////////////////////////////////////
-
- internal static int InternalConvertToUtf32(String s, int index)
- {
- Debug.Assert(s != null, "s != null");
- Debug.Assert(index >= 0 && index < s.Length, "index < s.Length");
- if (index < s.Length - 1)
- {
- int temp1 = (int)s[index] - HIGH_SURROGATE_START;
- if (temp1 >= 0 && temp1 <= 0x3ff)
- {
- int temp2 = (int)s[index + 1] - LOW_SURROGATE_START;
- if (temp2 >= 0 && temp2 <= 0x3ff)
- {
- // Convert the surrogate to UTF32 and get the result.
- return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
- }
- }
- }
- return ((int)s[index]);
- }
- ////////////////////////////////////////////////////////////////////////
- //
- // Convert a character or a surrogate pair starting at index of string s
- // to UTF32 value.
- //
- // Parameters:
- // s The string
- // index The starting index. It can point to a BMP character or
- // a surrogate pair.
- // len The length of the string.
- // charLength [out] If the index points to a BMP char, charLength
- // will be 1. If the index points to a surrogate pair,
- // charLength will be 2.
- //
- // WARNING: since it doesn't throw an exception it CAN return a value
- // in the surrogate range D800-DFFF, which are not legal unicode values.
- //
- // Returns:
- // The UTF32 value
- //
- ////////////////////////////////////////////////////////////////////////
-
- internal static int InternalConvertToUtf32(String s, int index, out int charLength)
- {
- Debug.Assert(s != null, "s != null");
- Debug.Assert(s.Length > 0, "s.Length > 0");
- Debug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length");
- charLength = 1;
- if (index < s.Length - 1)
- {
- int temp1 = (int)s[index] - HIGH_SURROGATE_START;
- if (temp1 >= 0 && temp1 <= 0x3ff)
- {
- int temp2 = (int)s[index + 1] - LOW_SURROGATE_START;
- if (temp2 >= 0 && temp2 <= 0x3ff)
- {
- // Convert the surrogate to UTF32 and get the result.
- charLength++;
- return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
- }
- }
- }
- return ((int)s[index]);
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // IsWhiteSpace
- //
- // Determines if the given character is a white space character.
- //
- ////////////////////////////////////////////////////////////////////////
-
- internal static bool IsWhiteSpace(String s, int index)
- {
- Debug.Assert(s != null, "s!=null");
- Debug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length");
-
- UnicodeCategory uc = GetUnicodeCategory(s, index);
- // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator".
- // And U+2029 is th eonly character which is under the category "ParagraphSeparator".
- switch (uc)
- {
- case (UnicodeCategory.SpaceSeparator):
- case (UnicodeCategory.LineSeparator):
- case (UnicodeCategory.ParagraphSeparator):
- return (true);
- }
- return (false);
- }
-
-
- internal static bool IsWhiteSpace(char c)
- {
- UnicodeCategory uc = GetUnicodeCategory(c);
- // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator".
- // And U+2029 is th eonly character which is under the category "ParagraphSeparator".
- switch (uc)
- {
- case (UnicodeCategory.SpaceSeparator):
- case (UnicodeCategory.LineSeparator):
- case (UnicodeCategory.ParagraphSeparator):
- return (true);
- }
-
- return (false);
- }
-
-
- //
- // This is called by the public char and string, index versions
- //
- // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character
- //
- internal static unsafe double InternalGetNumericValue(int ch)
- {
- Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range.");
- // Get the level 2 item from the highest 12 bit (8 - 19) of ch.
- ushort index = s_pNumericLevel1Index[ch >> 8];
- // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table.
- // The offset is referred to an float item in m_pNumericFloatData.
- // Note that & has the lower precedence than addition, so don't forget the parathesis.
- index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)];
-
- fixed (ushort* pUshortPtr = &(s_pNumericLevel1Index[index]))
- {
- byte* pBytePtr = (byte*)pUshortPtr;
- fixed (byte* pByteNum = s_pNumericValues)
- {
- double* pDouble = (double*)pByteNum;
- return pDouble[pBytePtr[(ch & 0x000f)]];
- }
- }
- }
-
- internal static unsafe ushort InternalGetDigitValues(int ch)
- {
- Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range.");
- // Get the level 2 item from the highest 12 bit (8 - 19) of ch.
- ushort index = s_pNumericLevel1Index[ch >> 8];
- // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table.
- // Note that & has the lower precedence than addition, so don't forget the parathesis.
- index = s_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)];
-
- fixed (ushort* pUshortPtr = &(s_pNumericLevel1Index[index]))
- {
- byte* pBytePtr = (byte*)pUshortPtr;
- return s_pDigitValues[pBytePtr[(ch & 0x000f)]];
- }
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- //Returns the numeric value associated with the character c. If the character is a fraction,
- // the return value will not be an integer. If the character does not have a numeric value, the return value is -1.
- //
- //Returns:
- // the numeric value for the specified Unicode character. If the character does not have a numeric value, the return value is -1.
- //Arguments:
- // ch a Unicode character
- //Exceptions:
- // ArgumentNullException
- // ArgumentOutOfRangeException
- //
- ////////////////////////////////////////////////////////////////////////
-
-
- public static double GetNumericValue(char ch)
- {
- return (InternalGetNumericValue(ch));
- }
-
-
- public static double GetNumericValue(String s, int index)
- {
- if (s == null)
- {
- throw new ArgumentNullException(nameof(s));
- }
- if (index < 0 || index >= s.Length)
- {
- throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
- }
- Contract.EndContractBlock();
- return (InternalGetNumericValue(InternalConvertToUtf32(s, index)));
- }
-
- public static int GetDecimalDigitValue(char ch)
- {
- return (sbyte)(InternalGetDigitValues(ch) >> 8);
- }
-
- public static int GetDecimalDigitValue(String s, int index)
- {
- if (s == null)
- {
- throw new ArgumentNullException(nameof(s));
- }
-
- if (index < 0 || index >= s.Length)
- {
- throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
- }
- Contract.EndContractBlock();
-
- return (sbyte)(InternalGetDigitValues(InternalConvertToUtf32(s, index)) >> 8);
- }
-
- public static int GetDigitValue(char ch)
- {
- return (sbyte)(InternalGetDigitValues(ch) & 0x00FF);
- }
-
- public static int GetDigitValue(String s, int index)
- {
- if (s == null)
- {
- throw new ArgumentNullException(nameof(s));
- }
-
- if (index < 0 || index >= s.Length)
- {
- throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
- }
-
- Contract.EndContractBlock();
- return (sbyte)(InternalGetDigitValues(InternalConvertToUtf32(s, index)) & 0x00FF);
- }
-
- public static UnicodeCategory GetUnicodeCategory(char ch)
- {
- return (InternalGetUnicodeCategory(ch));
- }
-
- public static UnicodeCategory GetUnicodeCategory(String s, int index)
- {
- if (s == null)
- throw new ArgumentNullException(nameof(s));
- if (((uint)index) >= ((uint)s.Length))
- {
- throw new ArgumentOutOfRangeException(nameof(index));
- }
- Contract.EndContractBlock();
- return InternalGetUnicodeCategory(s, index);
- }
-
- internal static unsafe UnicodeCategory InternalGetUnicodeCategory(int ch)
- {
- return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET));
- }
-
-
- ////////////////////////////////////////////////////////////////////////
- //
- //Action: Returns the Unicode Category property for the character c.
- //Returns:
- // an value in UnicodeCategory enum
- //Arguments:
- // ch a Unicode character
- //Exceptions:
- // None
- //
- //Note that this API will return values for D800-DF00 surrogate halves.
- //
- ////////////////////////////////////////////////////////////////////////
-
- internal static unsafe byte InternalGetCategoryValue(int ch, int offset)
- {
- Debug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range.");
- // Get the level 2 item from the highest 12 bit (8 - 19) of ch.
- ushort index = s_pCategoryLevel1Index[ch >> 8];
- // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table.
- // Note that & has the lower precedence than addition, so don't forget the parathesis.
- index = s_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)];
-
- fixed (ushort* pUshortPtr = &(s_pCategoryLevel1Index[index]))
- {
- byte* pBytePtr = (byte*)pUshortPtr;
- // Get the result from the 0 -3 bit of ch.
- byte valueIndex = pBytePtr[(ch & 0x000f)];
- byte uc = s_pCategoriesValue[valueIndex * 2 + offset];
- //
- // Make sure that OtherNotAssigned is the last category in UnicodeCategory.
- // If that changes, change the following assertion as well.
- //
- //Debug.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category");
- return (uc);
- }
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- //Action: Returns the Unicode Category property for the character c.
- //Returns:
- // an value in UnicodeCategory enum
- //Arguments:
- // value a Unicode String
- // index Index for the specified string.
- //Exceptions:
- // None
- //
- ////////////////////////////////////////////////////////////////////////
-
- internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index)
- {
- Debug.Assert(value != null, "value can not be null");
- Debug.Assert(index < value.Length, "index < value.Length");
-
- return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index)));
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1.
- // If the character is a valid surrogate pair, charLength will return 2.
- //
- ////////////////////////////////////////////////////////////////////////
-
- internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength)
- {
- Debug.Assert(str != null, "str can not be null");
- Debug.Assert(str.Length > 0, "str.Length > 0"); ;
- Debug.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length");
-
- return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength)));
- }
-
- internal static bool IsCombiningCategory(UnicodeCategory uc)
- {
- Debug.Assert(uc >= 0, "uc >= 0");
- return (
- uc == UnicodeCategory.NonSpacingMark ||
- uc == UnicodeCategory.SpacingCombiningMark ||
- uc == UnicodeCategory.EnclosingMark
- );
- }
- }
-}