summaryrefslogtreecommitdiff
path: root/src/mscorlib/corefx/System/Globalization/TextInfo.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/mscorlib/corefx/System/Globalization/TextInfo.cs')
-rw-r--r--src/mscorlib/corefx/System/Globalization/TextInfo.cs324
1 files changed, 308 insertions, 16 deletions
diff --git a/src/mscorlib/corefx/System/Globalization/TextInfo.cs b/src/mscorlib/corefx/System/Globalization/TextInfo.cs
index 6dadb5856a..5bb376f19c 100644
--- a/src/mscorlib/corefx/System/Globalization/TextInfo.cs
+++ b/src/mscorlib/corefx/System/Globalization/TextInfo.cs
@@ -12,15 +12,10 @@
//
////////////////////////////////////////////////////////////////////////////
-using System;
+using System.Diagnostics;
using System.Diagnostics.Contracts;
-using System.Runtime;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
using System.Runtime.Serialization;
-using System.Security;
using System.Text;
-using System.Threading;
namespace System.Globalization
{
@@ -145,6 +140,64 @@ namespace System.Globalization
return CompareInfo.LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true);
}
+ ////////////////////////////////////////////////////////////////////////
+ //
+ // CodePage
+ //
+ // Returns the number of the code page used by this writing system.
+ // The type parameter can be any of the following values:
+ // ANSICodePage
+ // OEMCodePage
+ // MACCodePage
+ //
+ ////////////////////////////////////////////////////////////////////////
+
+
+ public virtual int ANSICodePage
+ {
+ get
+ {
+ return (_cultureData.IDEFAULTANSICODEPAGE);
+ }
+ }
+
+
+ public virtual int OEMCodePage
+ {
+ get
+ {
+ return (_cultureData.IDEFAULTOEMCODEPAGE);
+ }
+ }
+
+
+ public virtual int MacCodePage
+ {
+ get
+ {
+ return (_cultureData.IDEFAULTMACCODEPAGE);
+ }
+ }
+
+
+ public virtual int EBCDICCodePage
+ {
+ get
+ {
+ return (_cultureData.IDEFAULTEBCDICCODEPAGE);
+ }
+ }
+
+ [System.Runtime.InteropServices.ComVisible(false)]
+ public int LCID
+ {
+ get
+ {
+ // Just use the LCID from our text info name
+ return CultureInfo.GetCultureInfo(_textInfoName).LCID;
+ }
+ }
+
//////////////////////////////////////////////////////////////////////////
////
//// CultureName
@@ -177,10 +230,10 @@ namespace System.Globalization
////
//// Clone
////
- //// Is the implementation of IColnable.
+ //// Is the implementation of ICloneable.
////
//////////////////////////////////////////////////////////////////////////
- public virtual Object Clone()
+ public virtual object Clone()
{
object o = MemberwiseClone();
((TextInfo)o).SetReadOnlyState(false);
@@ -196,9 +249,9 @@ namespace System.Globalization
//
////////////////////////////////////////////////////////////////////////
[System.Runtime.InteropServices.ComVisible(false)]
- internal static TextInfo ReadOnly(TextInfo textInfo)
+ public static TextInfo ReadOnly(TextInfo textInfo)
{
- if (textInfo == null) { throw new ArgumentNullException("textInfo"); }
+ if (textInfo == null) { throw new ArgumentNullException(nameof(textInfo)); }
Contract.EndContractBlock();
if (textInfo.IsReadOnly) { return (textInfo); }
@@ -244,7 +297,7 @@ namespace System.Globalization
{
if (value == null)
{
- throw new ArgumentNullException("value", SR.ArgumentNull_String);
+ throw new ArgumentNullException(nameof(value), SR.ArgumentNull_String);
}
VerifyWritable();
_listSeparator = value;
@@ -270,7 +323,7 @@ namespace System.Globalization
public unsafe virtual String ToLower(String str)
{
- if (str == null) { throw new ArgumentNullException("str"); }
+ if (str == null) { throw new ArgumentNullException(nameof(str)); }
return ChangeCase(str, toUpper: false);
}
@@ -303,7 +356,7 @@ namespace System.Globalization
public unsafe virtual String ToUpper(String str)
{
- if (str == null) { throw new ArgumentNullException("str"); }
+ if (str == null) { throw new ArgumentNullException(nameof(str)); }
return ChangeCase(str, toUpper: true);
}
@@ -317,7 +370,7 @@ namespace System.Globalization
return c;
}
- static private bool IsAscii(Char c)
+ private static bool IsAscii(Char c)
{
return c < 0x80;
}
@@ -396,6 +449,245 @@ namespace System.Globalization
}
//
+ // Titlecasing:
+ // -----------
+ // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
+ // and the rest of the letters are lowercase. The choice of which words to titlecase in headings
+ // and titles is dependent on language and local conventions. For example, "The Merry Wives of Windor"
+ // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
+ // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
+ // are not titlecased. In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
+ //
+ // Moreover, the determination of what actually constitutes a word is language dependent, and this can
+ // influence which letter or letters of a "word" are uppercased when titlecasing strings. For example
+ // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
+ //
+ public unsafe String ToTitleCase(String str)
+ {
+ if (str == null)
+ {
+ throw new ArgumentNullException(nameof(str));
+ }
+ Contract.EndContractBlock();
+ if (str.Length == 0)
+ {
+ return (str);
+ }
+
+ StringBuilder result = new StringBuilder();
+ string lowercaseData = null;
+
+ for (int i = 0; i < str.Length; i++)
+ {
+ UnicodeCategory charType;
+ int charLen;
+
+ charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
+ if (Char.CheckLetter(charType))
+ {
+ // Do the titlecasing for the first character of the word.
+ i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
+
+ //
+ // Convert the characters until the end of the this word
+ // to lowercase.
+ //
+ int lowercaseStart = i;
+
+ //
+ // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
+ // This is in line with Word 2000 behavior of titlecasing.
+ //
+ bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
+ // Use a loop to find all of the other letters following this letter.
+ while (i < str.Length)
+ {
+ charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
+ if (IsLetterCategory(charType))
+ {
+ if (charType == UnicodeCategory.LowercaseLetter)
+ {
+ hasLowerCase = true;
+ }
+ i += charLen;
+ }
+ else if (str[i] == '\'')
+ {
+ i++;
+ if (hasLowerCase)
+ {
+ if (lowercaseData == null)
+ {
+ lowercaseData = this.ToLower(str);
+ }
+ result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
+ }
+ else
+ {
+ result.Append(str, lowercaseStart, i - lowercaseStart);
+ }
+ lowercaseStart = i;
+ hasLowerCase = true;
+ }
+ else if (!IsWordSeparator(charType))
+ {
+ // This category is considered to be part of the word.
+ // This is any category that is marked as false in wordSeprator array.
+ i+= charLen;
+ }
+ else
+ {
+ // A word separator. Break out of the loop.
+ break;
+ }
+ }
+
+ int count = i - lowercaseStart;
+
+ if (count > 0)
+ {
+ if (hasLowerCase)
+ {
+ if (lowercaseData == null)
+ {
+ lowercaseData = this.ToLower(str);
+ }
+ result.Append(lowercaseData, lowercaseStart, count);
+ }
+ else
+ {
+ result.Append(str, lowercaseStart, count);
+ }
+ }
+
+ if (i < str.Length)
+ {
+ // not a letter, just append it
+ i = AddNonLetter(ref result, ref str, i, charLen);
+ }
+ }
+ else
+ {
+ // not a letter, just append it
+ i = AddNonLetter(ref result, ref str, i, charLen);
+ }
+ }
+ return (result.ToString());
+ }
+
+ private static int AddNonLetter(ref StringBuilder result, ref String input, int inputIndex, int charLen)
+ {
+ Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
+ if (charLen == 2)
+ {
+ // Surrogate pair
+ result.Append(input[inputIndex++]);
+ result.Append(input[inputIndex]);
+ }
+ else
+ {
+ result.Append(input[inputIndex]);
+ }
+ return inputIndex;
+ }
+
+ private int AddTitlecaseLetter(ref StringBuilder result, ref String input, int inputIndex, int charLen)
+ {
+ Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
+
+ // for surrogate pairs do a simple ToUpper operation on the substring
+ if (charLen == 2)
+ {
+ // Surrogate pair
+ result.Append(ToUpper(input.Substring(inputIndex, charLen)));
+ inputIndex++;
+ }
+ else
+ {
+ switch (input[inputIndex])
+ {
+ //
+ // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
+ case (char) 0x01C4: // DZ with Caron -> Dz with Caron
+ case (char) 0x01C5: // Dz with Caron -> Dz with Caron
+ case (char) 0x01C6: // dz with Caron -> Dz with Caron
+ result.Append((char) 0x01C5);
+ break;
+ case (char) 0x01C7: // LJ -> Lj
+ case (char) 0x01C8: // Lj -> Lj
+ case (char) 0x01C9: // lj -> Lj
+ result.Append((char) 0x01C8);
+ break;
+ case (char) 0x01CA: // NJ -> Nj
+ case (char) 0x01CB: // Nj -> Nj
+ case (char) 0x01CC: // nj -> Nj
+ result.Append((char) 0x01CB);
+ break;
+ case (char) 0x01F1: // DZ -> Dz
+ case (char) 0x01F2: // Dz -> Dz
+ case (char) 0x01F3: // dz -> Dz
+ result.Append((char) 0x01F2);
+ break;
+ default:
+ result.Append(ToUpper(input[inputIndex]));
+ break;
+ }
+ }
+ return inputIndex;
+ }
+
+ //
+ // Used in ToTitleCase():
+ // When we find a starting letter, the following array decides if a category should be
+ // considered as word seprator or not.
+ //
+ private const int c_wordSeparatorMask =
+ /* false */ (0 << 0) | // UppercaseLetter = 0,
+ /* false */ (0 << 1) | // LowercaseLetter = 1,
+ /* false */ (0 << 2) | // TitlecaseLetter = 2,
+ /* false */ (0 << 3) | // ModifierLetter = 3,
+ /* false */ (0 << 4) | // OtherLetter = 4,
+ /* false */ (0 << 5) | // NonSpacingMark = 5,
+ /* false */ (0 << 6) | // SpacingCombiningMark = 6,
+ /* false */ (0 << 7) | // EnclosingMark = 7,
+ /* false */ (0 << 8) | // DecimalDigitNumber = 8,
+ /* false */ (0 << 9) | // LetterNumber = 9,
+ /* false */ (0 << 10) | // OtherNumber = 10,
+ /* true */ (1 << 11) | // SpaceSeparator = 11,
+ /* true */ (1 << 12) | // LineSeparator = 12,
+ /* true */ (1 << 13) | // ParagraphSeparator = 13,
+ /* true */ (1 << 14) | // Control = 14,
+ /* true */ (1 << 15) | // Format = 15,
+ /* false */ (0 << 16) | // Surrogate = 16,
+ /* false */ (0 << 17) | // PrivateUse = 17,
+ /* true */ (1 << 18) | // ConnectorPunctuation = 18,
+ /* true */ (1 << 19) | // DashPunctuation = 19,
+ /* true */ (1 << 20) | // OpenPunctuation = 20,
+ /* true */ (1 << 21) | // ClosePunctuation = 21,
+ /* true */ (1 << 22) | // InitialQuotePunctuation = 22,
+ /* true */ (1 << 23) | // FinalQuotePunctuation = 23,
+ /* true */ (1 << 24) | // OtherPunctuation = 24,
+ /* true */ (1 << 25) | // MathSymbol = 25,
+ /* true */ (1 << 26) | // CurrencySymbol = 26,
+ /* true */ (1 << 27) | // ModifierSymbol = 27,
+ /* true */ (1 << 28) | // OtherSymbol = 28,
+ /* false */ (0 << 29); // OtherNotAssigned = 29;
+
+ private static bool IsWordSeparator(UnicodeCategory category)
+ {
+ return (c_wordSeparatorMask & (1 << (int) category)) != 0;
+ }
+
+ private static bool IsLetterCategory(UnicodeCategory uc)
+ {
+ return (uc == UnicodeCategory.UppercaseLetter
+ || uc == UnicodeCategory.LowercaseLetter
+ || uc == UnicodeCategory.TitlecaseLetter
+ || uc == UnicodeCategory.ModifierLetter
+ || uc == UnicodeCategory.OtherLetter);
+ }
+
+ //
// Get case-insensitive hash code for the specified string.
//
internal unsafe int GetCaseInsensitiveHashCode(String str)
@@ -403,7 +695,7 @@ namespace System.Globalization
// Validate inputs
if (str == null)
{
- throw new ArgumentNullException("str");
+ throw new ArgumentNullException(nameof(str));
}
// This code assumes that ASCII casing is safe for whatever context is passed in.
@@ -438,7 +730,7 @@ namespace System.Globalization
private unsafe int GetCaseInsensitiveHashCodeSlow(String str)
{
- Contract.Assert(str != null);
+ Debug.Assert(str != null);
string upper = ToUpper(str);