diff options
Diffstat (limited to 'src/mscorlib/corefx/System/Globalization/DateTimeFormatInfoScanner.cs')
-rw-r--r-- | src/mscorlib/corefx/System/Globalization/DateTimeFormatInfoScanner.cs | 742 |
1 files changed, 0 insertions, 742 deletions
diff --git a/src/mscorlib/corefx/System/Globalization/DateTimeFormatInfoScanner.cs b/src/mscorlib/corefx/System/Globalization/DateTimeFormatInfoScanner.cs deleted file mode 100644 index 9cbc19f385..0000000000 --- a/src/mscorlib/corefx/System/Globalization/DateTimeFormatInfoScanner.cs +++ /dev/null @@ -1,742 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -//////////////////////////////////////////////////////////////////////////// -// -// DateTimeFormatInfoScanner -// -// Scan a specified DateTimeFormatInfo to search for data used in DateTime.Parse() -// -// The data includes: -// -// DateWords: such as "de" used in es-ES (Spanish) LongDatePattern. -// Postfix: such as "ta" used in fi-FI after the month name. -// -// This class is shared among mscorlib.dll and sysglobl.dll. -// Use conditional CULTURE_AND_REGIONINFO_BUILDER_ONLY to differentiate between -// methods for mscorlib.dll and sysglobl.dll. -// -//////////////////////////////////////////////////////////////////////////// - -using System; -using System.Globalization; -using System.Collections; -using System.Collections.Generic; -using System.Text; - -namespace System.Globalization -{ - -#if INSIDE_CLR - using StringStringDictionary = Dictionary<string, string>; - using StringList = List<string>; -#else - using StringStringDictionary = LowLevelDictionary<string, string>; - using StringList = LowLevelList<string>; -#endif - - // - // from LocaleEx.txt header - // - //; IFORMATFLAGS - //; Parsing/formatting flags. - internal enum FORMATFLAGS - { - None = 0x00000000, - UseGenitiveMonth = 0x00000001, - UseLeapYearMonth = 0x00000002, - UseSpacesInMonthNames = 0x00000004, - UseHebrewParsing = 0x00000008, - UseSpacesInDayNames = 0x00000010, // Has spaces or non-breaking space in the day names. - UseDigitPrefixInTokens = 0x00000020, // Has token starting with numbers. - } - - internal enum CalendarId : ushort - { - UNINITIALIZED_VALUE = 0, - GREGORIAN = 1, // Gregorian (localized) calendar - GREGORIAN_US = 2, // Gregorian (U.S.) calendar - JAPAN = 3, // Japanese Emperor Era calendar - /* SSS_WARNINGS_OFF */ - TAIWAN = 4, // Taiwan Era calendar /* SSS_WARNINGS_ON */ - KOREA = 5, // Korean Tangun Era calendar - HIJRI = 6, // Hijri (Arabic Lunar) calendar - THAI = 7, // Thai calendar - HEBREW = 8, // Hebrew (Lunar) calendar - GREGORIAN_ME_FRENCH = 9, // Gregorian Middle East French calendar - GREGORIAN_ARABIC = 10, // Gregorian Arabic calendar - GREGORIAN_XLIT_ENGLISH = 11, // Gregorian Transliterated English calendar - GREGORIAN_XLIT_FRENCH = 12, - // Note that all calendars after this point are MANAGED ONLY for now. - JULIAN = 13, - JAPANESELUNISOLAR = 14, - CHINESELUNISOLAR = 15, - SAKA = 16, // reserved to match Office but not implemented in our code - LUNAR_ETO_CHN = 17, // reserved to match Office but not implemented in our code - LUNAR_ETO_KOR = 18, // reserved to match Office but not implemented in our code - LUNAR_ETO_ROKUYOU = 19, // reserved to match Office but not implemented in our code - KOREANLUNISOLAR = 20, - TAIWANLUNISOLAR = 21, - PERSIAN = 22, - UMALQURA = 23, - LAST_CALENDAR = 23 // Last calendar ID - } - - internal class DateTimeFormatInfoScanner - { - // Special prefix-like flag char in DateWord array. - - // Use char in PUA area since we won't be using them in real data. - // The char used to tell a read date word or a month postfix. A month postfix - // is "ta" in the long date pattern like "d. MMMM'ta 'yyyy" for fi-FI. - // In this case, it will be stored as "\xfffeta" in the date word array. - internal const char MonthPostfixChar = '\xe000'; - - // Add ignorable symbol in a DateWord array. - - // hu-HU has: - // shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd - // long date pattern: yyyy. MMMM d. - // Here, "." is the date separator (derived from short date pattern). However, - // "." also appear at the end of long date pattern. In this case, we just - // "." as ignorable symbol so that the DateTime.Parse() state machine will not - // treat the additional date separator at the end of y,m,d pattern as an error - // condition. - internal const char IgnorableSymbolChar = '\xe001'; - - // Known CJK suffix - internal const String CJKYearSuff = "\u5e74"; - internal const String CJKMonthSuff = "\u6708"; - internal const String CJKDaySuff = "\u65e5"; - - internal const String KoreanYearSuff = "\ub144"; - internal const String KoreanMonthSuff = "\uc6d4"; - internal const String KoreanDaySuff = "\uc77c"; - - internal const String KoreanHourSuff = "\uc2dc"; - internal const String KoreanMinuteSuff = "\ubd84"; - internal const String KoreanSecondSuff = "\ucd08"; - - internal const String CJKHourSuff = "\u6642"; - internal const String ChineseHourSuff = "\u65f6"; - - internal const String CJKMinuteSuff = "\u5206"; - internal const String CJKSecondSuff = "\u79d2"; - - // The collection fo date words & postfix. - internal StringList m_dateWords = new StringList(); - // Hashtable for the known words. - private static volatile StringStringDictionary s_knownWords; - - static StringStringDictionary KnownWords - { - get - { - if (s_knownWords == null) - { - StringStringDictionary temp = new StringStringDictionary(); - // Add known words into the hash table. - - // Skip these special symbols. - temp.Add("/", String.Empty); - temp.Add("-", String.Empty); - temp.Add(".", String.Empty); - // Skip known CJK suffixes. - temp.Add(CJKYearSuff, String.Empty); - temp.Add(CJKMonthSuff, String.Empty); - temp.Add(CJKDaySuff, String.Empty); - temp.Add(KoreanYearSuff, String.Empty); - temp.Add(KoreanMonthSuff, String.Empty); - temp.Add(KoreanDaySuff, String.Empty); - temp.Add(KoreanHourSuff, String.Empty); - temp.Add(KoreanMinuteSuff, String.Empty); - temp.Add(KoreanSecondSuff, String.Empty); - temp.Add(CJKHourSuff, String.Empty); - temp.Add(ChineseHourSuff, String.Empty); - temp.Add(CJKMinuteSuff, String.Empty); - temp.Add(CJKSecondSuff, String.Empty); - - s_knownWords = temp; - } - return (s_knownWords); - } - } - - //////////////////////////////////////////////////////////////////////////// - // - // Parameters: - // pattern: The pattern to be scanned. - // currentIndex: the current index to start the scan. - // - // Returns: - // Return the index with the first character that is a letter, which will - // be the start of a date word. - // Note that the index can be pattern.Length if we reach the end of the string. - // - //////////////////////////////////////////////////////////////////////////// - internal static int SkipWhiteSpacesAndNonLetter(String pattern, int currentIndex) - { - while (currentIndex < pattern.Length) - { - char ch = pattern[currentIndex]; - if (ch == '\\') - { - // Escaped character. Look ahead one character. - currentIndex++; - if (currentIndex < pattern.Length) - { - ch = pattern[currentIndex]; - if (ch == '\'') - { - // Skip the leading single quote. We will - // stop at the first letter. - continue; - } - // Fall thru to check if this is a letter. - } - else - { - // End of string - break; - } - } - if (Char.IsLetter(ch) || ch == '\'' || ch == '.') - { - break; - } - // Skip the current char since it is not a letter. - currentIndex++; - } - return (currentIndex); - } - - //////////////////////////////////////////////////////////////////////////// - // - // A helper to add the found date word or month postfix into ArrayList for date words. - // - // Parameters: - // formatPostfix: What kind of postfix this is. - // Possible values: - // null: This is a regular date word - // "MMMM": month postfix - // word: The date word or postfix to be added. - // - //////////////////////////////////////////////////////////////////////////// - internal void AddDateWordOrPostfix(String formatPostfix, String str) - { - if (str.Length > 0) - { - // Some cultures use . like an abbreviation - if (str.Equals(".")) - { - AddIgnorableSymbols("."); - return; - } - String words; - if (KnownWords.TryGetValue(str, out words) == false) - { - if (m_dateWords == null) - { - m_dateWords = new StringList(); - } - if (formatPostfix == "MMMM") - { - // Add the word into the ArrayList as "\xfffe" + real month postfix. - String temp = MonthPostfixChar + str; - if (!m_dateWords.Contains(temp)) - { - m_dateWords.Add(temp); - } - } - else - { - if (!m_dateWords.Contains(str)) - { - m_dateWords.Add(str); - } - if (str[str.Length - 1] == '.') - { - // Old version ignore the trialing dot in the date words. Support this as well. - String strWithoutDot = str.Substring(0, str.Length - 1); - if (!m_dateWords.Contains(strWithoutDot)) - { - m_dateWords.Add(strWithoutDot); - } - } - } - } - } - } - - //////////////////////////////////////////////////////////////////////////// - // - // Scan the pattern from the specified index and add the date word/postfix - // when appropriate. - // - // Parameters: - // pattern: The pattern to be scanned. - // index: The starting index to be scanned. - // formatPostfix: The kind of postfix to be scanned. - // Possible values: - // null: This is a regular date word - // "MMMM": month postfix - // - // - //////////////////////////////////////////////////////////////////////////// - internal int AddDateWords(String pattern, int index, String formatPostfix) - { - // Skip any whitespaces so we will start from a letter. - int newIndex = SkipWhiteSpacesAndNonLetter(pattern, index); - if (newIndex != index && formatPostfix != null) - { - // There are whitespaces. This will not be a postfix. - formatPostfix = null; - } - index = newIndex; - - // This is the first char added into dateWord. - // Skip all non-letter character. We will add the first letter into DateWord. - StringBuilder dateWord = new StringBuilder(); - // We assume that date words should start with a letter. - // Skip anything until we see a letter. - - while (index < pattern.Length) - { - char ch = pattern[index]; - if (ch == '\'') - { - // We have seen the end of quote. Add the word if we do not see it before, - // and break the while loop. - AddDateWordOrPostfix(formatPostfix, dateWord.ToString()); - index++; - break; - } - else if (ch == '\\') - { - // - // Escaped character. Look ahead one character - // - - // Skip escaped backslash. - index++; - if (index < pattern.Length) - { - dateWord.Append(pattern[index]); - index++; - } - } - else if (Char.IsWhiteSpace(ch)) - { - // Found a whitespace. We have to add the current date word/postfix. - AddDateWordOrPostfix(formatPostfix, dateWord.ToString()); - if (formatPostfix != null) - { - // Done with postfix. The rest will be regular date word. - formatPostfix = null; - } - // Reset the dateWord. - dateWord.Length = 0; - index++; - } - else - { - dateWord.Append(ch); - index++; - } - } - return (index); - } - - //////////////////////////////////////////////////////////////////////////// - // - // A simple helper to find the repeat count for a specified char. - // - //////////////////////////////////////////////////////////////////////////// - internal static int ScanRepeatChar(String pattern, char ch, int index, out int count) - { - count = 1; - while (++index < pattern.Length && pattern[index] == ch) - { - count++; - } - // Return the updated position. - return (index); - } - - //////////////////////////////////////////////////////////////////////////// - // - // Add the text that is a date separator but is treated like ignroable symbol. - // E.g. - // hu-HU has: - // shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd - // long date pattern: yyyy. MMMM d. - // Here, "." is the date separator (derived from short date pattern). However, - // "." also appear at the end of long date pattern. In this case, we just - // "." as ignorable symbol so that the DateTime.Parse() state machine will not - // treat the additional date separator at the end of y,m,d pattern as an error - // condition. - // - //////////////////////////////////////////////////////////////////////////// - - internal void AddIgnorableSymbols(String text) - { - if (m_dateWords == null) - { - // Create the date word array. - m_dateWords = new StringList(); - } - // Add the ignorable symbol into the ArrayList. - String temp = IgnorableSymbolChar + text; - if (!m_dateWords.Contains(temp)) - { - m_dateWords.Add(temp); - } - } - - - // - // Flag used to trace the date patterns (yy/yyyyy/M/MM/MMM/MMM/d/dd) that we have seen. - // - private enum FoundDatePattern - { - None = 0x0000, - FoundYearPatternFlag = 0x0001, - FoundMonthPatternFlag = 0x0002, - FoundDayPatternFlag = 0x0004, - FoundYMDPatternFlag = 0x0007, // FoundYearPatternFlag | FoundMonthPatternFlag | FoundDayPatternFlag; - } - - // Check if we have found all of the year/month/day pattern. - private FoundDatePattern _ymdFlags = FoundDatePattern.None; - - - //////////////////////////////////////////////////////////////////////////// - // - // Given a date format pattern, scan for date word or postfix. - // - // A date word should be always put in a single quoted string. And it will - // start from a letter, so whitespace and symbols will be ignored before - // the first letter. - // - // Examples of date word: - // 'de' in es-SP: dddd, dd' de 'MMMM' de 'yyyy - // "\x0443." in bg-BG: dd.M.yyyy '\x0433.' - // - // Example of postfix: - // month postfix: - // "ta" in fi-FI: d. MMMM'ta 'yyyy - // Currently, only month postfix is supported. - // - // Usage: - // Always call this with Framework-style pattern, instead of Windows style pattern. - // Windows style pattern uses '' for single quote, while .NET uses \' - // - //////////////////////////////////////////////////////////////////////////// - internal void ScanDateWord(String pattern) - { - // Check if we have found all of the year/month/day pattern. - _ymdFlags = FoundDatePattern.None; - - int i = 0; - while (i < pattern.Length) - { - char ch = pattern[i]; - int chCount; - - switch (ch) - { - case '\'': - // Find a beginning quote. Search until the end quote. - i = AddDateWords(pattern, i + 1, null); - break; - case 'M': - i = ScanRepeatChar(pattern, 'M', i, out chCount); - if (chCount >= 4) - { - if (i < pattern.Length && pattern[i] == '\'') - { - i = AddDateWords(pattern, i + 1, "MMMM"); - } - } - _ymdFlags |= FoundDatePattern.FoundMonthPatternFlag; - break; - case 'y': - i = ScanRepeatChar(pattern, 'y', i, out chCount); - _ymdFlags |= FoundDatePattern.FoundYearPatternFlag; - break; - case 'd': - i = ScanRepeatChar(pattern, 'd', i, out chCount); - if (chCount <= 2) - { - // Only count "d" & "dd". - // ddd, dddd are day names. Do not count them. - _ymdFlags |= FoundDatePattern.FoundDayPatternFlag; - } - break; - case '\\': - // Found a escaped char not in a quoted string. Skip the current backslash - // and its next character. - i += 2; - break; - case '.': - if (_ymdFlags == FoundDatePattern.FoundYMDPatternFlag) - { - // If we find a dot immediately after the we have seen all of the y, m, d pattern. - // treat it as a ignroable symbol. Check for comments in AddIgnorableSymbols for - // more details. - AddIgnorableSymbols("."); - _ymdFlags = FoundDatePattern.None; - } - i++; - break; - default: - if (_ymdFlags == FoundDatePattern.FoundYMDPatternFlag && !Char.IsWhiteSpace(ch)) - { - // We are not seeing "." after YMD. Clear the flag. - _ymdFlags = FoundDatePattern.None; - } - // We are not in quote. Skip the current character. - i++; - break; - } - } - } - - //////////////////////////////////////////////////////////////////////////// - // - // Given a DTFI, get all of the date words from date patterns and time patterns. - // - //////////////////////////////////////////////////////////////////////////// - - internal String[] GetDateWordsOfDTFI(DateTimeFormatInfo dtfi) - { - // Enumarate all LongDatePatterns, and get the DateWords and scan for month postfix. - String[] datePatterns = dtfi.GetAllDateTimePatterns('D'); - int i; - - // Scan the long date patterns - for (i = 0; i < datePatterns.Length; i++) - { - ScanDateWord(datePatterns[i]); - } - - // Scan the short date patterns - datePatterns = dtfi.GetAllDateTimePatterns('d'); - for (i = 0; i < datePatterns.Length; i++) - { - ScanDateWord(datePatterns[i]); - } - // Scan the YearMonth patterns. - datePatterns = dtfi.GetAllDateTimePatterns('y'); - for (i = 0; i < datePatterns.Length; i++) - { - ScanDateWord(datePatterns[i]); - } - - // Scan the month/day pattern - ScanDateWord(dtfi.MonthDayPattern); - - // Scan the long time patterns. - datePatterns = dtfi.GetAllDateTimePatterns('T'); - for (i = 0; i < datePatterns.Length; i++) - { - ScanDateWord(datePatterns[i]); - } - - // Scan the short time patterns. - datePatterns = dtfi.GetAllDateTimePatterns('t'); - for (i = 0; i < datePatterns.Length; i++) - { - ScanDateWord(datePatterns[i]); - } - - String[] result = null; - if (m_dateWords != null && m_dateWords.Count > 0) - { - result = new String[m_dateWords.Count]; - for (i = 0; i < m_dateWords.Count; i++) - { - result[i] = m_dateWords[i]; - } - } - return (result); - } - - - //////////////////////////////////////////////////////////////////////////// - // - // Scan the month names to see if genitive month names are used, and return - // the format flag. - // - //////////////////////////////////////////////////////////////////////////// - internal static FORMATFLAGS GetFormatFlagGenitiveMonth(String[] monthNames, String[] genitveMonthNames, String[] abbrevMonthNames, String[] genetiveAbbrevMonthNames) - { - // If we have different names in regular and genitive month names, use genitive month flag. - return ((!EqualStringArrays(monthNames, genitveMonthNames) || !EqualStringArrays(abbrevMonthNames, genetiveAbbrevMonthNames)) - ? FORMATFLAGS.UseGenitiveMonth : 0); - } - - //////////////////////////////////////////////////////////////////////////// - // - // Scan the month names to see if spaces are used or start with a digit, and return the format flag - // - //////////////////////////////////////////////////////////////////////////// - internal static FORMATFLAGS GetFormatFlagUseSpaceInMonthNames(String[] monthNames, String[] genitveMonthNames, String[] abbrevMonthNames, String[] genetiveAbbrevMonthNames) - { - FORMATFLAGS formatFlags = 0; - formatFlags |= (ArrayElementsBeginWithDigit(monthNames) || - ArrayElementsBeginWithDigit(genitveMonthNames) || - ArrayElementsBeginWithDigit(abbrevMonthNames) || - ArrayElementsBeginWithDigit(genetiveAbbrevMonthNames) - ? FORMATFLAGS.UseDigitPrefixInTokens : 0); - - formatFlags |= (ArrayElementsHaveSpace(monthNames) || - ArrayElementsHaveSpace(genitveMonthNames) || - ArrayElementsHaveSpace(abbrevMonthNames) || - ArrayElementsHaveSpace(genetiveAbbrevMonthNames) - ? FORMATFLAGS.UseSpacesInMonthNames : 0); - return (formatFlags); - } - - //////////////////////////////////////////////////////////////////////////// - // - // Scan the day names and set the correct format flag. - // - //////////////////////////////////////////////////////////////////////////// - internal static FORMATFLAGS GetFormatFlagUseSpaceInDayNames(String[] dayNames, String[] abbrevDayNames) - { - return ((ArrayElementsHaveSpace(dayNames) || - ArrayElementsHaveSpace(abbrevDayNames)) - ? FORMATFLAGS.UseSpacesInDayNames : 0); - } - - //////////////////////////////////////////////////////////////////////////// - // - // Check the calendar to see if it is HebrewCalendar and set the Hebrew format flag if necessary. - // - //////////////////////////////////////////////////////////////////////////// - internal static FORMATFLAGS GetFormatFlagUseHebrewCalendar(int calID) - { - return (calID == (int)CalendarId.HEBREW ? - FORMATFLAGS.UseHebrewParsing | FORMATFLAGS.UseLeapYearMonth : 0); - } - - - //----------------------------------------------------------------------------- - // EqualStringArrays - // compares two string arrays and return true if all elements of the first - // array equals to all elmentsof the second array. - // otherwise it returns false. - //----------------------------------------------------------------------------- - - private static bool EqualStringArrays(string[] array1, string[] array2) - { - // Shortcut if they're the same array - if (array1 == array2) - { - return true; - } - - // This is effectively impossible - if (array1.Length != array2.Length) - { - return false; - } - - // Check each string - for (int i = 0; i < array1.Length; i++) - { - if (!array1[i].Equals(array2[i])) - { - return false; - } - } - - return true; - } - - //----------------------------------------------------------------------------- - // ArrayElementsHaveSpace - // It checks all input array elements if any of them has space character - // returns true if found space character in one of the array elements. - // otherwise returns false. - //----------------------------------------------------------------------------- - - private static bool ArrayElementsHaveSpace(string[] array) - { - for (int i = 0; i < array.Length; i++) - { - // it is faster to check for space character manually instead of calling IndexOf - // so we don't have to go to native code side. - for (int j = 0; j < array[i].Length; j++) - { - if (Char.IsWhiteSpace(array[i][j])) - { - return true; - } - } - } - - return false; - } - - - //////////////////////////////////////////////////////////////////////////// - // - // Check if any element of the array start with a digit. - // - //////////////////////////////////////////////////////////////////////////// - private static bool ArrayElementsBeginWithDigit(string[] array) - { - for (int i = 0; i < array.Length; i++) - { - // it is faster to check for space character manually instead of calling IndexOf - // so we don't have to go to native code side. - if (array[i].Length > 0 && - array[i][0] >= '0' && array[i][0] <= '9') - { - int index = 1; - while (index < array[i].Length && array[i][index] >= '0' && array[i][index] <= '9') - { - // Skip other digits. - index++; - } - if (index == array[i].Length) - { - return (false); - } - - if (index == array[i].Length - 1) - { - // Skip known CJK month suffix. - // CJK uses month name like "1\x6708", since \x6708 is a known month suffix, - // we don't need the UseDigitPrefixInTokens since it is slower. - switch (array[i][index]) - { - case '\x6708': // CJKMonthSuff - case '\xc6d4': // KoreanMonthSuff - return (false); - } - } - - if (index == array[i].Length - 4) - { - // Skip known CJK month suffix. - // Starting with Windows 8, the CJK months for some cultures looks like: "1' \x6708'" - // instead of just "1\x6708" - if (array[i][index] == '\'' && array[i][index + 1] == ' ' && - array[i][index + 2] == '\x6708' && array[i][index + 3] == '\'') - { - return (false); - } - } - return (true); - } - } - - return false; - } - } -} - |