diff options
Diffstat (limited to 'src/mscorlib/corefx/System/Globalization/TextInfo.cs')
-rw-r--r-- | src/mscorlib/corefx/System/Globalization/TextInfo.cs | 324 |
1 files changed, 308 insertions, 16 deletions
diff --git a/src/mscorlib/corefx/System/Globalization/TextInfo.cs b/src/mscorlib/corefx/System/Globalization/TextInfo.cs index 6dadb5856a..5bb376f19c 100644 --- a/src/mscorlib/corefx/System/Globalization/TextInfo.cs +++ b/src/mscorlib/corefx/System/Globalization/TextInfo.cs @@ -12,15 +12,10 @@ // //////////////////////////////////////////////////////////////////////////// -using System; +using System.Diagnostics; using System.Diagnostics.Contracts; -using System.Runtime; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Runtime.Serialization; -using System.Security; using System.Text; -using System.Threading; namespace System.Globalization { @@ -145,6 +140,64 @@ namespace System.Globalization return CompareInfo.LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); } + //////////////////////////////////////////////////////////////////////// + // + // CodePage + // + // Returns the number of the code page used by this writing system. + // The type parameter can be any of the following values: + // ANSICodePage + // OEMCodePage + // MACCodePage + // + //////////////////////////////////////////////////////////////////////// + + + public virtual int ANSICodePage + { + get + { + return (_cultureData.IDEFAULTANSICODEPAGE); + } + } + + + public virtual int OEMCodePage + { + get + { + return (_cultureData.IDEFAULTOEMCODEPAGE); + } + } + + + public virtual int MacCodePage + { + get + { + return (_cultureData.IDEFAULTMACCODEPAGE); + } + } + + + public virtual int EBCDICCodePage + { + get + { + return (_cultureData.IDEFAULTEBCDICCODEPAGE); + } + } + + [System.Runtime.InteropServices.ComVisible(false)] + public int LCID + { + get + { + // Just use the LCID from our text info name + return CultureInfo.GetCultureInfo(_textInfoName).LCID; + } + } + ////////////////////////////////////////////////////////////////////////// //// //// CultureName @@ -177,10 +230,10 @@ namespace System.Globalization //// //// Clone //// - //// Is the implementation of IColnable. + //// Is the implementation of ICloneable. //// ////////////////////////////////////////////////////////////////////////// - public virtual Object Clone() + public virtual object Clone() { object o = MemberwiseClone(); ((TextInfo)o).SetReadOnlyState(false); @@ -196,9 +249,9 @@ namespace System.Globalization // //////////////////////////////////////////////////////////////////////// [System.Runtime.InteropServices.ComVisible(false)] - internal static TextInfo ReadOnly(TextInfo textInfo) + public static TextInfo ReadOnly(TextInfo textInfo) { - if (textInfo == null) { throw new ArgumentNullException("textInfo"); } + if (textInfo == null) { throw new ArgumentNullException(nameof(textInfo)); } Contract.EndContractBlock(); if (textInfo.IsReadOnly) { return (textInfo); } @@ -244,7 +297,7 @@ namespace System.Globalization { if (value == null) { - throw new ArgumentNullException("value", SR.ArgumentNull_String); + throw new ArgumentNullException(nameof(value), SR.ArgumentNull_String); } VerifyWritable(); _listSeparator = value; @@ -270,7 +323,7 @@ namespace System.Globalization public unsafe virtual String ToLower(String str) { - if (str == null) { throw new ArgumentNullException("str"); } + if (str == null) { throw new ArgumentNullException(nameof(str)); } return ChangeCase(str, toUpper: false); } @@ -303,7 +356,7 @@ namespace System.Globalization public unsafe virtual String ToUpper(String str) { - if (str == null) { throw new ArgumentNullException("str"); } + if (str == null) { throw new ArgumentNullException(nameof(str)); } return ChangeCase(str, toUpper: true); } @@ -317,7 +370,7 @@ namespace System.Globalization return c; } - static private bool IsAscii(Char c) + private static bool IsAscii(Char c) { return c < 0x80; } @@ -396,6 +449,245 @@ namespace System.Globalization } // + // Titlecasing: + // ----------- + // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter + // and the rest of the letters are lowercase. The choice of which words to titlecase in headings + // and titles is dependent on language and local conventions. For example, "The Merry Wives of Windor" + // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased. + // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von" + // are not titlecased. In French even fewer words are titlecased: "Les joyeuses commeres de Windsor." + // + // Moreover, the determination of what actually constitutes a word is language dependent, and this can + // influence which letter or letters of a "word" are uppercased when titlecasing strings. For example + // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English. + // + public unsafe String ToTitleCase(String str) + { + if (str == null) + { + throw new ArgumentNullException(nameof(str)); + } + Contract.EndContractBlock(); + if (str.Length == 0) + { + return (str); + } + + StringBuilder result = new StringBuilder(); + string lowercaseData = null; + + for (int i = 0; i < str.Length; i++) + { + UnicodeCategory charType; + int charLen; + + charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen); + if (Char.CheckLetter(charType)) + { + // Do the titlecasing for the first character of the word. + i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1; + + // + // Convert the characters until the end of the this word + // to lowercase. + // + int lowercaseStart = i; + + // + // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc) + // This is in line with Word 2000 behavior of titlecasing. + // + bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter); + // Use a loop to find all of the other letters following this letter. + while (i < str.Length) + { + charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen); + if (IsLetterCategory(charType)) + { + if (charType == UnicodeCategory.LowercaseLetter) + { + hasLowerCase = true; + } + i += charLen; + } + else if (str[i] == '\'') + { + i++; + if (hasLowerCase) + { + if (lowercaseData == null) + { + lowercaseData = this.ToLower(str); + } + result.Append(lowercaseData, lowercaseStart, i - lowercaseStart); + } + else + { + result.Append(str, lowercaseStart, i - lowercaseStart); + } + lowercaseStart = i; + hasLowerCase = true; + } + else if (!IsWordSeparator(charType)) + { + // This category is considered to be part of the word. + // This is any category that is marked as false in wordSeprator array. + i+= charLen; + } + else + { + // A word separator. Break out of the loop. + break; + } + } + + int count = i - lowercaseStart; + + if (count > 0) + { + if (hasLowerCase) + { + if (lowercaseData == null) + { + lowercaseData = this.ToLower(str); + } + result.Append(lowercaseData, lowercaseStart, count); + } + else + { + result.Append(str, lowercaseStart, count); + } + } + + if (i < str.Length) + { + // not a letter, just append it + i = AddNonLetter(ref result, ref str, i, charLen); + } + } + else + { + // not a letter, just append it + i = AddNonLetter(ref result, ref str, i, charLen); + } + } + return (result.ToString()); + } + + private static int AddNonLetter(ref StringBuilder result, ref String input, int inputIndex, int charLen) + { + Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!"); + if (charLen == 2) + { + // Surrogate pair + result.Append(input[inputIndex++]); + result.Append(input[inputIndex]); + } + else + { + result.Append(input[inputIndex]); + } + return inputIndex; + } + + private int AddTitlecaseLetter(ref StringBuilder result, ref String input, int inputIndex, int charLen) + { + Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!"); + + // for surrogate pairs do a simple ToUpper operation on the substring + if (charLen == 2) + { + // Surrogate pair + result.Append(ToUpper(input.Substring(inputIndex, charLen))); + inputIndex++; + } + else + { + switch (input[inputIndex]) + { + // + // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below. + case (char) 0x01C4: // DZ with Caron -> Dz with Caron + case (char) 0x01C5: // Dz with Caron -> Dz with Caron + case (char) 0x01C6: // dz with Caron -> Dz with Caron + result.Append((char) 0x01C5); + break; + case (char) 0x01C7: // LJ -> Lj + case (char) 0x01C8: // Lj -> Lj + case (char) 0x01C9: // lj -> Lj + result.Append((char) 0x01C8); + break; + case (char) 0x01CA: // NJ -> Nj + case (char) 0x01CB: // Nj -> Nj + case (char) 0x01CC: // nj -> Nj + result.Append((char) 0x01CB); + break; + case (char) 0x01F1: // DZ -> Dz + case (char) 0x01F2: // Dz -> Dz + case (char) 0x01F3: // dz -> Dz + result.Append((char) 0x01F2); + break; + default: + result.Append(ToUpper(input[inputIndex])); + break; + } + } + return inputIndex; + } + + // + // Used in ToTitleCase(): + // When we find a starting letter, the following array decides if a category should be + // considered as word seprator or not. + // + private const int c_wordSeparatorMask = + /* false */ (0 << 0) | // UppercaseLetter = 0, + /* false */ (0 << 1) | // LowercaseLetter = 1, + /* false */ (0 << 2) | // TitlecaseLetter = 2, + /* false */ (0 << 3) | // ModifierLetter = 3, + /* false */ (0 << 4) | // OtherLetter = 4, + /* false */ (0 << 5) | // NonSpacingMark = 5, + /* false */ (0 << 6) | // SpacingCombiningMark = 6, + /* false */ (0 << 7) | // EnclosingMark = 7, + /* false */ (0 << 8) | // DecimalDigitNumber = 8, + /* false */ (0 << 9) | // LetterNumber = 9, + /* false */ (0 << 10) | // OtherNumber = 10, + /* true */ (1 << 11) | // SpaceSeparator = 11, + /* true */ (1 << 12) | // LineSeparator = 12, + /* true */ (1 << 13) | // ParagraphSeparator = 13, + /* true */ (1 << 14) | // Control = 14, + /* true */ (1 << 15) | // Format = 15, + /* false */ (0 << 16) | // Surrogate = 16, + /* false */ (0 << 17) | // PrivateUse = 17, + /* true */ (1 << 18) | // ConnectorPunctuation = 18, + /* true */ (1 << 19) | // DashPunctuation = 19, + /* true */ (1 << 20) | // OpenPunctuation = 20, + /* true */ (1 << 21) | // ClosePunctuation = 21, + /* true */ (1 << 22) | // InitialQuotePunctuation = 22, + /* true */ (1 << 23) | // FinalQuotePunctuation = 23, + /* true */ (1 << 24) | // OtherPunctuation = 24, + /* true */ (1 << 25) | // MathSymbol = 25, + /* true */ (1 << 26) | // CurrencySymbol = 26, + /* true */ (1 << 27) | // ModifierSymbol = 27, + /* true */ (1 << 28) | // OtherSymbol = 28, + /* false */ (0 << 29); // OtherNotAssigned = 29; + + private static bool IsWordSeparator(UnicodeCategory category) + { + return (c_wordSeparatorMask & (1 << (int) category)) != 0; + } + + private static bool IsLetterCategory(UnicodeCategory uc) + { + return (uc == UnicodeCategory.UppercaseLetter + || uc == UnicodeCategory.LowercaseLetter + || uc == UnicodeCategory.TitlecaseLetter + || uc == UnicodeCategory.ModifierLetter + || uc == UnicodeCategory.OtherLetter); + } + + // // Get case-insensitive hash code for the specified string. // internal unsafe int GetCaseInsensitiveHashCode(String str) @@ -403,7 +695,7 @@ namespace System.Globalization // Validate inputs if (str == null) { - throw new ArgumentNullException("str"); + throw new ArgumentNullException(nameof(str)); } // This code assumes that ASCII casing is safe for whatever context is passed in. @@ -438,7 +730,7 @@ namespace System.Globalization private unsafe int GetCaseInsensitiveHashCodeSlow(String str) { - Contract.Assert(str != null); + Debug.Assert(str != null); string upper = ToUpper(str); |