summaryrefslogtreecommitdiff
path: root/src/mscorlib/corefx/System/Globalization/TextInfo.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/mscorlib/corefx/System/Globalization/TextInfo.cs')
-rw-r--r--src/mscorlib/corefx/System/Globalization/TextInfo.cs746
1 files changed, 0 insertions, 746 deletions
diff --git a/src/mscorlib/corefx/System/Globalization/TextInfo.cs b/src/mscorlib/corefx/System/Globalization/TextInfo.cs
deleted file mode 100644
index 172bbd25b2..0000000000
--- a/src/mscorlib/corefx/System/Globalization/TextInfo.cs
+++ /dev/null
@@ -1,746 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-////////////////////////////////////////////////////////////////////////////
-//
-//
-// Purpose: This Class defines behaviors specific to a writing system.
-// A writing system is the collection of scripts and
-// orthographic rules required to represent a language as text.
-//
-//
-////////////////////////////////////////////////////////////////////////////
-
-using System.Diagnostics;
-using System.Diagnostics.Contracts;
-using System.Runtime.Serialization;
-using System.Text;
-
-namespace System.Globalization
-{
- [Serializable]
- public partial class TextInfo : ICloneable, IDeserializationCallback
- {
- ////--------------------------------------------------------------------//
- //// Internal Information //
- ////--------------------------------------------------------------------//
-
- private enum Tristate : byte
- {
- NotInitialized,
- True,
- False,
- }
-
- ////
- //// Variables.
- ////
-
- [OptionalField(VersionAdded = 2)]
- private String _listSeparator;
- [OptionalField(VersionAdded = 2)]
- private bool _isReadOnly = false;
-
- //// _cultureName is the name of the creating culture. Note that we consider this authoratative,
- //// if the culture's textinfo changes when deserializing, then behavior may change.
- //// (ala Whidbey behavior). This is the only string Arrowhead needs to serialize.
- //// _cultureData is the data that backs this class.
- //// _textInfoName is the actual name of the textInfo (from cultureData.STEXTINFO)
- //// this can be the same as _cultureName on Silverlight since the OS knows
- //// how to do the sorting. However in the desktop, when we call the sorting dll, it doesn't
- //// know how to resolve custom locle names to sort ids so we have to have alredy resolved this.
- ////
-
- [OptionalField(VersionAdded = 3)]
- private String _cultureName; // Name of the culture that created this text info
- [NonSerialized]
- private CultureData _cultureData; // Data record for the culture that made us, not for this textinfo
- [NonSerialized]
- private String _textInfoName; // Name of the text info we're using (ie: _cultureData.STEXTINFO)
- [NonSerialized]
- private Tristate _isAsciiCasingSameAsInvariant = Tristate.NotInitialized;
-
- // Invariant text info
- internal static TextInfo Invariant
- {
- get
- {
- if (s_Invariant == null)
- s_Invariant = new TextInfo(CultureData.Invariant);
- return s_Invariant;
- }
- }
- internal volatile static TextInfo s_Invariant;
-
- [OnSerializing]
- private void OnSerializing(StreamingContext ctx) { }
-
- [OnDeserializing]
- private void OnDeserializing(StreamingContext ctx)
- {
- // Clear these so we can check if we've fixed them yet
- _cultureData = null;
- _cultureName = null;
- }
-
- [OnDeserialized]
- private void OnDeserialized(StreamingContext ctx)
- {
- OnDeserialized();
- }
-
- void IDeserializationCallback.OnDeserialization(Object sender)
- {
- OnDeserialized();
- }
-
- private void OnDeserialized()
- {
- // this method will be called twice because of the support of IDeserializationCallback
- if (_cultureData == null)
- {
- // Get the text info name belonging to that culture
- _cultureData = CultureInfo.GetCultureInfo(_cultureName).m_cultureData;
- _textInfoName = _cultureData.STEXTINFO;
- FinishInitialization(_textInfoName);
- }
- }
-
- //
- // Internal ordinal comparison functions
- //
-
- internal static int GetHashCodeOrdinalIgnoreCase(String s)
- {
- // This is the same as an case insensitive hash for Invariant
- // (not necessarily true for sorting, but OK for casing & then we apply normal hash code rules)
- return (Invariant.GetCaseInsensitiveHashCode(s));
- }
-
- // Currently we don't have native functions to do this, so we do it the hard way
- internal static int IndexOfStringOrdinalIgnoreCase(String source, String value, int startIndex, int count)
- {
- if (count > source.Length || count < 0 || startIndex < 0 || startIndex >= source.Length || startIndex + count > source.Length)
- {
- return -1;
- }
-
- return CompareInfo.IndexOfOrdinal(source, value, startIndex, count, ignoreCase: true);
- }
-
- // Currently we don't have native functions to do this, so we do it the hard way
- internal static int LastIndexOfStringOrdinalIgnoreCase(String source, String value, int startIndex, int count)
- {
- if (count > source.Length || count < 0 || startIndex < 0 || startIndex > source.Length - 1 || (startIndex - count + 1 < 0))
- {
- return -1;
- }
-
- return CompareInfo.LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true);
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // CodePage
- //
- // Returns the number of the code page used by this writing system.
- // The type parameter can be any of the following values:
- // ANSICodePage
- // OEMCodePage
- // MACCodePage
- //
- ////////////////////////////////////////////////////////////////////////
-
-
- public virtual int ANSICodePage
- {
- get
- {
- return (_cultureData.IDEFAULTANSICODEPAGE);
- }
- }
-
-
- public virtual int OEMCodePage
- {
- get
- {
- return (_cultureData.IDEFAULTOEMCODEPAGE);
- }
- }
-
-
- public virtual int MacCodePage
- {
- get
- {
- return (_cultureData.IDEFAULTMACCODEPAGE);
- }
- }
-
-
- public virtual int EBCDICCodePage
- {
- get
- {
- return (_cultureData.IDEFAULTEBCDICCODEPAGE);
- }
- }
-
- public int LCID
- {
- get
- {
- // Just use the LCID from our text info name
- return CultureInfo.GetCultureInfo(_textInfoName).LCID;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////
- ////
- //// CultureName
- ////
- //// The name of the culture associated with the current TextInfo.
- ////
- //////////////////////////////////////////////////////////////////////////
- public string CultureName
- {
- get
- {
- return _textInfoName;
- }
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // IsReadOnly
- //
- // Detect if the object is readonly.
- //
- ////////////////////////////////////////////////////////////////////////
- public bool IsReadOnly
- {
- get { return (_isReadOnly); }
- }
-
- //////////////////////////////////////////////////////////////////////////
- ////
- //// Clone
- ////
- //// Is the implementation of ICloneable.
- ////
- //////////////////////////////////////////////////////////////////////////
- public virtual object Clone()
- {
- object o = MemberwiseClone();
- ((TextInfo)o).SetReadOnlyState(false);
- return (o);
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // ReadOnly
- //
- // Create a cloned readonly instance or return the input one if it is
- // readonly.
- //
- ////////////////////////////////////////////////////////////////////////
- public static TextInfo ReadOnly(TextInfo textInfo)
- {
- if (textInfo == null) { throw new ArgumentNullException(nameof(textInfo)); }
- Contract.EndContractBlock();
- if (textInfo.IsReadOnly) { return (textInfo); }
-
- TextInfo clonedTextInfo = (TextInfo)(textInfo.MemberwiseClone());
- clonedTextInfo.SetReadOnlyState(true);
-
- return (clonedTextInfo);
- }
-
- private void VerifyWritable()
- {
- if (_isReadOnly)
- {
- throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
- }
- }
-
- internal void SetReadOnlyState(bool readOnly)
- {
- _isReadOnly = readOnly;
- }
-
-
- ////////////////////////////////////////////////////////////////////////
- //
- // ListSeparator
- //
- // Returns the string used to separate items in a list.
- //
- ////////////////////////////////////////////////////////////////////////
- public virtual String ListSeparator
- {
- get
- {
- if (_listSeparator == null)
- {
- _listSeparator = _cultureData.SLIST;
- }
- return (_listSeparator);
- }
-
- set
- {
- if (value == null)
- {
- throw new ArgumentNullException(nameof(value), SR.ArgumentNull_String);
- }
- VerifyWritable();
- _listSeparator = value;
- }
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // ToLower
- //
- // Converts the character or string to lower case. Certain locales
- // have different casing semantics from the file systems in Win32.
- //
- ////////////////////////////////////////////////////////////////////////
- public unsafe virtual char ToLower(char c)
- {
- if (IsAscii(c) && IsAsciiCasingSameAsInvariant)
- {
- return ToLowerAsciiInvariant(c);
- }
- return (ChangeCase(c, toUpper: false));
- }
-
- public unsafe virtual String ToLower(String str)
- {
- if (str == null) { throw new ArgumentNullException(nameof(str)); }
-
- return ChangeCase(str, toUpper: false);
- }
-
- private static Char ToLowerAsciiInvariant(Char c)
- {
- if ((uint)(c - 'A') <= (uint)('Z' - 'A'))
- {
- c = (Char)(c | 0x20);
- }
- return c;
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // ToUpper
- //
- // Converts the character or string to upper case. Certain locales
- // have different casing semantics from the file systems in Win32.
- //
- ////////////////////////////////////////////////////////////////////////
- public unsafe virtual char ToUpper(char c)
- {
- if (IsAscii(c) && IsAsciiCasingSameAsInvariant)
- {
- return ToUpperAsciiInvariant(c);
- }
- return (ChangeCase(c, toUpper: true));
- }
-
- public unsafe virtual String ToUpper(String str)
- {
- if (str == null) { throw new ArgumentNullException(nameof(str)); }
-
- return ChangeCase(str, toUpper: true);
- }
-
- private static Char ToUpperAsciiInvariant(Char c)
- {
- if ((uint)(c - 'a') <= (uint)('z' - 'a'))
- {
- c = (Char)(c & ~0x20);
- }
- return c;
- }
-
- private static bool IsAscii(Char c)
- {
- return c < 0x80;
- }
-
- private bool IsAsciiCasingSameAsInvariant
- {
- get
- {
- if (_isAsciiCasingSameAsInvariant == Tristate.NotInitialized)
- {
- _isAsciiCasingSameAsInvariant = CultureInfo.GetCultureInfo(_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz",
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
- CompareOptions.IgnoreCase) == 0 ? Tristate.True : Tristate.False;
- }
- return _isAsciiCasingSameAsInvariant == Tristate.True;
- }
- }
-
- // IsRightToLeft
- //
- // Returns true if the dominant direction of text and UI such as the relative position of buttons and scroll bars
- //
- public bool IsRightToLeft
- {
- get
- {
- return _cultureData.IsRightToLeft;
- }
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // Equals
- //
- // Implements Object.Equals(). Returns a boolean indicating whether
- // or not object refers to the same CultureInfo as the current instance.
- //
- ////////////////////////////////////////////////////////////////////////
- public override bool Equals(Object obj)
- {
- TextInfo that = obj as TextInfo;
-
- if (that != null)
- {
- return this.CultureName.Equals(that.CultureName);
- }
-
- return (false);
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // GetHashCode
- //
- // Implements Object.GetHashCode(). Returns the hash code for the
- // CultureInfo. The hash code is guaranteed to be the same for CultureInfo A
- // and B where A.Equals(B) is true.
- //
- ////////////////////////////////////////////////////////////////////////
- public override int GetHashCode()
- {
- return (this.CultureName.GetHashCode());
- }
-
- ////////////////////////////////////////////////////////////////////////
- //
- // ToString
- //
- // Implements Object.ToString(). Returns a string describing the
- // TextInfo.
- //
- ////////////////////////////////////////////////////////////////////////
- public override String ToString()
- {
- return ("TextInfo - " + _cultureData.CultureName);
- }
-
- //
- // Titlecasing:
- // -----------
- // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
- // and the rest of the letters are lowercase. The choice of which words to titlecase in headings
- // and titles is dependent on language and local conventions. For example, "The Merry Wives of Windor"
- // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
- // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
- // are not titlecased. In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
- //
- // Moreover, the determination of what actually constitutes a word is language dependent, and this can
- // influence which letter or letters of a "word" are uppercased when titlecasing strings. For example
- // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
- //
- public unsafe String ToTitleCase(String str)
- {
- if (str == null)
- {
- throw new ArgumentNullException(nameof(str));
- }
- Contract.EndContractBlock();
- if (str.Length == 0)
- {
- return (str);
- }
-
- StringBuilder result = new StringBuilder();
- string lowercaseData = null;
-
- for (int i = 0; i < str.Length; i++)
- {
- UnicodeCategory charType;
- int charLen;
-
- charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
- if (Char.CheckLetter(charType))
- {
- // Do the titlecasing for the first character of the word.
- i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
-
- //
- // Convert the characters until the end of the this word
- // to lowercase.
- //
- int lowercaseStart = i;
-
- //
- // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
- // This is in line with Word 2000 behavior of titlecasing.
- //
- bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
- // Use a loop to find all of the other letters following this letter.
- while (i < str.Length)
- {
- charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
- if (IsLetterCategory(charType))
- {
- if (charType == UnicodeCategory.LowercaseLetter)
- {
- hasLowerCase = true;
- }
- i += charLen;
- }
- else if (str[i] == '\'')
- {
- i++;
- if (hasLowerCase)
- {
- if (lowercaseData == null)
- {
- lowercaseData = this.ToLower(str);
- }
- result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
- }
- else
- {
- result.Append(str, lowercaseStart, i - lowercaseStart);
- }
- lowercaseStart = i;
- hasLowerCase = true;
- }
- else if (!IsWordSeparator(charType))
- {
- // This category is considered to be part of the word.
- // This is any category that is marked as false in wordSeprator array.
- i+= charLen;
- }
- else
- {
- // A word separator. Break out of the loop.
- break;
- }
- }
-
- int count = i - lowercaseStart;
-
- if (count > 0)
- {
- if (hasLowerCase)
- {
- if (lowercaseData == null)
- {
- lowercaseData = this.ToLower(str);
- }
- result.Append(lowercaseData, lowercaseStart, count);
- }
- else
- {
- result.Append(str, lowercaseStart, count);
- }
- }
-
- if (i < str.Length)
- {
- // not a letter, just append it
- i = AddNonLetter(ref result, ref str, i, charLen);
- }
- }
- else
- {
- // not a letter, just append it
- i = AddNonLetter(ref result, ref str, i, charLen);
- }
- }
- return (result.ToString());
- }
-
- private static int AddNonLetter(ref StringBuilder result, ref String input, int inputIndex, int charLen)
- {
- Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
- if (charLen == 2)
- {
- // Surrogate pair
- result.Append(input[inputIndex++]);
- result.Append(input[inputIndex]);
- }
- else
- {
- result.Append(input[inputIndex]);
- }
- return inputIndex;
- }
-
- private int AddTitlecaseLetter(ref StringBuilder result, ref String input, int inputIndex, int charLen)
- {
- Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
-
- // for surrogate pairs do a simple ToUpper operation on the substring
- if (charLen == 2)
- {
- // Surrogate pair
- result.Append(ToUpper(input.Substring(inputIndex, charLen)));
- inputIndex++;
- }
- else
- {
- switch (input[inputIndex])
- {
- //
- // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
- case (char) 0x01C4: // DZ with Caron -> Dz with Caron
- case (char) 0x01C5: // Dz with Caron -> Dz with Caron
- case (char) 0x01C6: // dz with Caron -> Dz with Caron
- result.Append((char) 0x01C5);
- break;
- case (char) 0x01C7: // LJ -> Lj
- case (char) 0x01C8: // Lj -> Lj
- case (char) 0x01C9: // lj -> Lj
- result.Append((char) 0x01C8);
- break;
- case (char) 0x01CA: // NJ -> Nj
- case (char) 0x01CB: // Nj -> Nj
- case (char) 0x01CC: // nj -> Nj
- result.Append((char) 0x01CB);
- break;
- case (char) 0x01F1: // DZ -> Dz
- case (char) 0x01F2: // Dz -> Dz
- case (char) 0x01F3: // dz -> Dz
- result.Append((char) 0x01F2);
- break;
- default:
- result.Append(ToUpper(input[inputIndex]));
- break;
- }
- }
- return inputIndex;
- }
-
- //
- // Used in ToTitleCase():
- // When we find a starting letter, the following array decides if a category should be
- // considered as word seprator or not.
- //
- private const int c_wordSeparatorMask =
- /* false */ (0 << 0) | // UppercaseLetter = 0,
- /* false */ (0 << 1) | // LowercaseLetter = 1,
- /* false */ (0 << 2) | // TitlecaseLetter = 2,
- /* false */ (0 << 3) | // ModifierLetter = 3,
- /* false */ (0 << 4) | // OtherLetter = 4,
- /* false */ (0 << 5) | // NonSpacingMark = 5,
- /* false */ (0 << 6) | // SpacingCombiningMark = 6,
- /* false */ (0 << 7) | // EnclosingMark = 7,
- /* false */ (0 << 8) | // DecimalDigitNumber = 8,
- /* false */ (0 << 9) | // LetterNumber = 9,
- /* false */ (0 << 10) | // OtherNumber = 10,
- /* true */ (1 << 11) | // SpaceSeparator = 11,
- /* true */ (1 << 12) | // LineSeparator = 12,
- /* true */ (1 << 13) | // ParagraphSeparator = 13,
- /* true */ (1 << 14) | // Control = 14,
- /* true */ (1 << 15) | // Format = 15,
- /* false */ (0 << 16) | // Surrogate = 16,
- /* false */ (0 << 17) | // PrivateUse = 17,
- /* true */ (1 << 18) | // ConnectorPunctuation = 18,
- /* true */ (1 << 19) | // DashPunctuation = 19,
- /* true */ (1 << 20) | // OpenPunctuation = 20,
- /* true */ (1 << 21) | // ClosePunctuation = 21,
- /* true */ (1 << 22) | // InitialQuotePunctuation = 22,
- /* true */ (1 << 23) | // FinalQuotePunctuation = 23,
- /* true */ (1 << 24) | // OtherPunctuation = 24,
- /* true */ (1 << 25) | // MathSymbol = 25,
- /* true */ (1 << 26) | // CurrencySymbol = 26,
- /* true */ (1 << 27) | // ModifierSymbol = 27,
- /* true */ (1 << 28) | // OtherSymbol = 28,
- /* false */ (0 << 29); // OtherNotAssigned = 29;
-
- private static bool IsWordSeparator(UnicodeCategory category)
- {
- return (c_wordSeparatorMask & (1 << (int) category)) != 0;
- }
-
- private static bool IsLetterCategory(UnicodeCategory uc)
- {
- return (uc == UnicodeCategory.UppercaseLetter
- || uc == UnicodeCategory.LowercaseLetter
- || uc == UnicodeCategory.TitlecaseLetter
- || uc == UnicodeCategory.ModifierLetter
- || uc == UnicodeCategory.OtherLetter);
- }
-
- //
- // Get case-insensitive hash code for the specified string.
- //
- internal unsafe int GetCaseInsensitiveHashCode(String str)
- {
- // Validate inputs
- if (str == null)
- {
- throw new ArgumentNullException(nameof(str));
- }
-
- // This code assumes that ASCII casing is safe for whatever context is passed in.
- // this is true today, because we only ever call these methods on Invariant. It would be ideal to refactor
- // these methods so they were correct by construction and we could only ever use Invariant.
-
- uint hash = 5381;
- uint c;
-
- // Note: We assume that str contains only ASCII characters until
- // we hit a non-ASCII character to optimize the common case.
- for (int i = 0; i < str.Length; i++)
- {
- c = str[i];
- if (c >= 0x80)
- {
- return GetCaseInsensitiveHashCodeSlow(str);
- }
-
- // If we have a lowercase character, ANDing off 0x20
- // will make it an uppercase character.
- if ((c - 'a') <= ('z' - 'a'))
- {
- c = (uint)((int)c & ~0x20);
- }
-
- hash = ((hash << 5) + hash) ^ c;
- }
-
- return (int)hash;
- }
-
- private unsafe int GetCaseInsensitiveHashCodeSlow(String str)
- {
- Debug.Assert(str != null);
-
- string upper = ToUpper(str);
-
- uint hash = 5381;
- uint c;
-
- for (int i = 0; i < upper.Length; i++)
- {
- c = upper[i];
- hash = ((hash << 5) + hash) ^ c;
- }
-
- return (int)hash;
- }
- }
-}