// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. //////////////////////////////////////////////////////////////////////////// // // DateTimeFormatInfoScanner // // Scan a specified DateTimeFormatInfo to search for data used in DateTime.Parse() // // The data includes: // // DateWords: such as "de" used in es-ES (Spanish) LongDatePattern. // Postfix: such as "ta" used in fi-FI after the month name. // // This class is shared among mscorlib.dll and sysglobl.dll. // Use conditional CULTURE_AND_REGIONINFO_BUILDER_ONLY to differentiate between // methods for mscorlib.dll and sysglobl.dll. // //////////////////////////////////////////////////////////////////////////// using System; using System.Globalization; using System.Collections; using System.Collections.Generic; using System.Text; namespace System.Globalization { #if INSIDE_CLR using StringStringDictionary = Dictionary; using StringList = List; #else using StringStringDictionary = LowLevelDictionary; using StringList = LowLevelList; #endif // // from LocaleEx.txt header // //; IFORMATFLAGS //; Parsing/formatting flags. internal enum FORMATFLAGS { None = 0x00000000, UseGenitiveMonth = 0x00000001, UseLeapYearMonth = 0x00000002, UseSpacesInMonthNames = 0x00000004, UseHebrewParsing = 0x00000008, UseSpacesInDayNames = 0x00000010, // Has spaces or non-breaking space in the day names. UseDigitPrefixInTokens = 0x00000020, // Has token starting with numbers. } internal enum CalendarId : ushort { UNINITIALIZED_VALUE = 0, GREGORIAN = 1, // Gregorian (localized) calendar GREGORIAN_US = 2, // Gregorian (U.S.) calendar JAPAN = 3, // Japanese Emperor Era calendar /* SSS_WARNINGS_OFF */ TAIWAN = 4, // Taiwan Era calendar /* SSS_WARNINGS_ON */ KOREA = 5, // Korean Tangun Era calendar HIJRI = 6, // Hijri (Arabic Lunar) calendar THAI = 7, // Thai calendar HEBREW = 8, // Hebrew (Lunar) calendar GREGORIAN_ME_FRENCH = 9, // Gregorian Middle East French calendar GREGORIAN_ARABIC = 10, // Gregorian Arabic calendar GREGORIAN_XLIT_ENGLISH = 11, // Gregorian Transliterated English calendar GREGORIAN_XLIT_FRENCH = 12, // Note that all calendars after this point are MANAGED ONLY for now. JULIAN = 13, JAPANESELUNISOLAR = 14, CHINESELUNISOLAR = 15, SAKA = 16, // reserved to match Office but not implemented in our code LUNAR_ETO_CHN = 17, // reserved to match Office but not implemented in our code LUNAR_ETO_KOR = 18, // reserved to match Office but not implemented in our code LUNAR_ETO_ROKUYOU = 19, // reserved to match Office but not implemented in our code KOREANLUNISOLAR = 20, TAIWANLUNISOLAR = 21, PERSIAN = 22, UMALQURA = 23, LAST_CALENDAR = 23 // Last calendar ID } internal class DateTimeFormatInfoScanner { // Special prefix-like flag char in DateWord array. // Use char in PUA area since we won't be using them in real data. // The char used to tell a read date word or a month postfix. A month postfix // is "ta" in the long date pattern like "d. MMMM'ta 'yyyy" for fi-FI. // In this case, it will be stored as "\xfffeta" in the date word array. internal const char MonthPostfixChar = '\xe000'; // Add ignorable symbol in a DateWord array. // hu-HU has: // shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd // long date pattern: yyyy. MMMM d. // Here, "." is the date separator (derived from short date pattern). However, // "." also appear at the end of long date pattern. In this case, we just // "." as ignorable symbol so that the DateTime.Parse() state machine will not // treat the additional date separator at the end of y,m,d pattern as an error // condition. internal const char IgnorableSymbolChar = '\xe001'; // Known CJK suffix internal const String CJKYearSuff = "\u5e74"; internal const String CJKMonthSuff = "\u6708"; internal const String CJKDaySuff = "\u65e5"; internal const String KoreanYearSuff = "\ub144"; internal const String KoreanMonthSuff = "\uc6d4"; internal const String KoreanDaySuff = "\uc77c"; internal const String KoreanHourSuff = "\uc2dc"; internal const String KoreanMinuteSuff = "\ubd84"; internal const String KoreanSecondSuff = "\ucd08"; internal const String CJKHourSuff = "\u6642"; internal const String ChineseHourSuff = "\u65f6"; internal const String CJKMinuteSuff = "\u5206"; internal const String CJKSecondSuff = "\u79d2"; // The collection fo date words & postfix. internal StringList m_dateWords = new StringList(); // Hashtable for the known words. private static volatile StringStringDictionary s_knownWords; static StringStringDictionary KnownWords { get { if (s_knownWords == null) { StringStringDictionary temp = new StringStringDictionary(); // Add known words into the hash table. // Skip these special symbols. temp.Add("/", String.Empty); temp.Add("-", String.Empty); temp.Add(".", String.Empty); // Skip known CJK suffixes. temp.Add(CJKYearSuff, String.Empty); temp.Add(CJKMonthSuff, String.Empty); temp.Add(CJKDaySuff, String.Empty); temp.Add(KoreanYearSuff, String.Empty); temp.Add(KoreanMonthSuff, String.Empty); temp.Add(KoreanDaySuff, String.Empty); temp.Add(KoreanHourSuff, String.Empty); temp.Add(KoreanMinuteSuff, String.Empty); temp.Add(KoreanSecondSuff, String.Empty); temp.Add(CJKHourSuff, String.Empty); temp.Add(ChineseHourSuff, String.Empty); temp.Add(CJKMinuteSuff, String.Empty); temp.Add(CJKSecondSuff, String.Empty); s_knownWords = temp; } return (s_knownWords); } } //////////////////////////////////////////////////////////////////////////// // // Parameters: // pattern: The pattern to be scanned. // currentIndex: the current index to start the scan. // // Returns: // Return the index with the first character that is a letter, which will // be the start of a date word. // Note that the index can be pattern.Length if we reach the end of the string. // //////////////////////////////////////////////////////////////////////////// internal static int SkipWhiteSpacesAndNonLetter(String pattern, int currentIndex) { while (currentIndex < pattern.Length) { char ch = pattern[currentIndex]; if (ch == '\\') { // Escaped character. Look ahead one character. currentIndex++; if (currentIndex < pattern.Length) { ch = pattern[currentIndex]; if (ch == '\'') { // Skip the leading single quote. We will // stop at the first letter. continue; } // Fall thru to check if this is a letter. } else { // End of string break; } } if (Char.IsLetter(ch) || ch == '\'' || ch == '.') { break; } // Skip the current char since it is not a letter. currentIndex++; } return (currentIndex); } //////////////////////////////////////////////////////////////////////////// // // A helper to add the found date word or month postfix into ArrayList for date words. // // Parameters: // formatPostfix: What kind of postfix this is. // Possible values: // null: This is a regular date word // "MMMM": month postfix // word: The date word or postfix to be added. // //////////////////////////////////////////////////////////////////////////// internal void AddDateWordOrPostfix(String formatPostfix, String str) { if (str.Length > 0) { // Some cultures use . like an abbreviation if (str.Equals(".")) { AddIgnorableSymbols("."); return; } String words; if (KnownWords.TryGetValue(str, out words) == false) { if (m_dateWords == null) { m_dateWords = new StringList(); } if (formatPostfix == "MMMM") { // Add the word into the ArrayList as "\xfffe" + real month postfix. String temp = MonthPostfixChar + str; if (!m_dateWords.Contains(temp)) { m_dateWords.Add(temp); } } else { if (!m_dateWords.Contains(str)) { m_dateWords.Add(str); } if (str[str.Length - 1] == '.') { // Old version ignore the trialing dot in the date words. Support this as well. String strWithoutDot = str.Substring(0, str.Length - 1); if (!m_dateWords.Contains(strWithoutDot)) { m_dateWords.Add(strWithoutDot); } } } } } } //////////////////////////////////////////////////////////////////////////// // // Scan the pattern from the specified index and add the date word/postfix // when appropriate. // // Parameters: // pattern: The pattern to be scanned. // index: The starting index to be scanned. // formatPostfix: The kind of postfix to be scanned. // Possible values: // null: This is a regular date word // "MMMM": month postfix // // //////////////////////////////////////////////////////////////////////////// internal int AddDateWords(String pattern, int index, String formatPostfix) { // Skip any whitespaces so we will start from a letter. int newIndex = SkipWhiteSpacesAndNonLetter(pattern, index); if (newIndex != index && formatPostfix != null) { // There are whitespaces. This will not be a postfix. formatPostfix = null; } index = newIndex; // This is the first char added into dateWord. // Skip all non-letter character. We will add the first letter into DateWord. StringBuilder dateWord = new StringBuilder(); // We assume that date words should start with a letter. // Skip anything until we see a letter. while (index < pattern.Length) { char ch = pattern[index]; if (ch == '\'') { // We have seen the end of quote. Add the word if we do not see it before, // and break the while loop. AddDateWordOrPostfix(formatPostfix, dateWord.ToString()); index++; break; } else if (ch == '\\') { // // Escaped character. Look ahead one character // // Skip escaped backslash. index++; if (index < pattern.Length) { dateWord.Append(pattern[index]); index++; } } else if (Char.IsWhiteSpace(ch)) { // Found a whitespace. We have to add the current date word/postfix. AddDateWordOrPostfix(formatPostfix, dateWord.ToString()); if (formatPostfix != null) { // Done with postfix. The rest will be regular date word. formatPostfix = null; } // Reset the dateWord. dateWord.Length = 0; index++; } else { dateWord.Append(ch); index++; } } return (index); } //////////////////////////////////////////////////////////////////////////// // // A simple helper to find the repeat count for a specified char. // //////////////////////////////////////////////////////////////////////////// internal static int ScanRepeatChar(String pattern, char ch, int index, out int count) { count = 1; while (++index < pattern.Length && pattern[index] == ch) { count++; } // Return the updated position. return (index); } //////////////////////////////////////////////////////////////////////////// // // Add the text that is a date separator but is treated like ignroable symbol. // E.g. // hu-HU has: // shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd // long date pattern: yyyy. MMMM d. // Here, "." is the date separator (derived from short date pattern). However, // "." also appear at the end of long date pattern. In this case, we just // "." as ignorable symbol so that the DateTime.Parse() state machine will not // treat the additional date separator at the end of y,m,d pattern as an error // condition. // //////////////////////////////////////////////////////////////////////////// internal void AddIgnorableSymbols(String text) { if (m_dateWords == null) { // Create the date word array. m_dateWords = new StringList(); } // Add the ignorable symbol into the ArrayList. String temp = IgnorableSymbolChar + text; if (!m_dateWords.Contains(temp)) { m_dateWords.Add(temp); } } // // Flag used to trace the date patterns (yy/yyyyy/M/MM/MMM/MMM/d/dd) that we have seen. // private enum FoundDatePattern { None = 0x0000, FoundYearPatternFlag = 0x0001, FoundMonthPatternFlag = 0x0002, FoundDayPatternFlag = 0x0004, FoundYMDPatternFlag = 0x0007, // FoundYearPatternFlag | FoundMonthPatternFlag | FoundDayPatternFlag; } // Check if we have found all of the year/month/day pattern. private FoundDatePattern _ymdFlags = FoundDatePattern.None; //////////////////////////////////////////////////////////////////////////// // // Given a date format pattern, scan for date word or postfix. // // A date word should be always put in a single quoted string. And it will // start from a letter, so whitespace and symbols will be ignored before // the first letter. // // Examples of date word: // 'de' in es-SP: dddd, dd' de 'MMMM' de 'yyyy // "\x0443." in bg-BG: dd.M.yyyy '\x0433.' // // Example of postfix: // month postfix: // "ta" in fi-FI: d. MMMM'ta 'yyyy // Currently, only month postfix is supported. // // Usage: // Always call this with Framework-style pattern, instead of Windows style pattern. // Windows style pattern uses '' for single quote, while .NET uses \' // //////////////////////////////////////////////////////////////////////////// internal void ScanDateWord(String pattern) { // Check if we have found all of the year/month/day pattern. _ymdFlags = FoundDatePattern.None; int i = 0; while (i < pattern.Length) { char ch = pattern[i]; int chCount; switch (ch) { case '\'': // Find a beginning quote. Search until the end quote. i = AddDateWords(pattern, i + 1, null); break; case 'M': i = ScanRepeatChar(pattern, 'M', i, out chCount); if (chCount >= 4) { if (i < pattern.Length && pattern[i] == '\'') { i = AddDateWords(pattern, i + 1, "MMMM"); } } _ymdFlags |= FoundDatePattern.FoundMonthPatternFlag; break; case 'y': i = ScanRepeatChar(pattern, 'y', i, out chCount); _ymdFlags |= FoundDatePattern.FoundYearPatternFlag; break; case 'd': i = ScanRepeatChar(pattern, 'd', i, out chCount); if (chCount <= 2) { // Only count "d" & "dd". // ddd, dddd are day names. Do not count them. _ymdFlags |= FoundDatePattern.FoundDayPatternFlag; } break; case '\\': // Found a escaped char not in a quoted string. Skip the current backslash // and its next character. i += 2; break; case '.': if (_ymdFlags == FoundDatePattern.FoundYMDPatternFlag) { // If we find a dot immediately after the we have seen all of the y, m, d pattern. // treat it as a ignroable symbol. Check for comments in AddIgnorableSymbols for // more details. AddIgnorableSymbols("."); _ymdFlags = FoundDatePattern.None; } i++; break; default: if (_ymdFlags == FoundDatePattern.FoundYMDPatternFlag && !Char.IsWhiteSpace(ch)) { // We are not seeing "." after YMD. Clear the flag. _ymdFlags = FoundDatePattern.None; } // We are not in quote. Skip the current character. i++; break; } } } //////////////////////////////////////////////////////////////////////////// // // Given a DTFI, get all of the date words from date patterns and time patterns. // //////////////////////////////////////////////////////////////////////////// internal String[] GetDateWordsOfDTFI(DateTimeFormatInfo dtfi) { // Enumarate all LongDatePatterns, and get the DateWords and scan for month postfix. String[] datePatterns = dtfi.GetAllDateTimePatterns('D'); int i; // Scan the long date patterns for (i = 0; i < datePatterns.Length; i++) { ScanDateWord(datePatterns[i]); } // Scan the short date patterns datePatterns = dtfi.GetAllDateTimePatterns('d'); for (i = 0; i < datePatterns.Length; i++) { ScanDateWord(datePatterns[i]); } // Scan the YearMonth patterns. datePatterns = dtfi.GetAllDateTimePatterns('y'); for (i = 0; i < datePatterns.Length; i++) { ScanDateWord(datePatterns[i]); } // Scan the month/day pattern ScanDateWord(dtfi.MonthDayPattern); // Scan the long time patterns. datePatterns = dtfi.GetAllDateTimePatterns('T'); for (i = 0; i < datePatterns.Length; i++) { ScanDateWord(datePatterns[i]); } // Scan the short time patterns. datePatterns = dtfi.GetAllDateTimePatterns('t'); for (i = 0; i < datePatterns.Length; i++) { ScanDateWord(datePatterns[i]); } String[] result = null; if (m_dateWords != null && m_dateWords.Count > 0) { result = new String[m_dateWords.Count]; for (i = 0; i < m_dateWords.Count; i++) { result[i] = m_dateWords[i]; } } return (result); } //////////////////////////////////////////////////////////////////////////// // // Scan the month names to see if genitive month names are used, and return // the format flag. // //////////////////////////////////////////////////////////////////////////// internal static FORMATFLAGS GetFormatFlagGenitiveMonth(String[] monthNames, String[] genitveMonthNames, String[] abbrevMonthNames, String[] genetiveAbbrevMonthNames) { // If we have different names in regular and genitive month names, use genitive month flag. return ((!EqualStringArrays(monthNames, genitveMonthNames) || !EqualStringArrays(abbrevMonthNames, genetiveAbbrevMonthNames)) ? FORMATFLAGS.UseGenitiveMonth : 0); } //////////////////////////////////////////////////////////////////////////// // // Scan the month names to see if spaces are used or start with a digit, and return the format flag // //////////////////////////////////////////////////////////////////////////// internal static FORMATFLAGS GetFormatFlagUseSpaceInMonthNames(String[] monthNames, String[] genitveMonthNames, String[] abbrevMonthNames, String[] genetiveAbbrevMonthNames) { FORMATFLAGS formatFlags = 0; formatFlags |= (ArrayElementsBeginWithDigit(monthNames) || ArrayElementsBeginWithDigit(genitveMonthNames) || ArrayElementsBeginWithDigit(abbrevMonthNames) || ArrayElementsBeginWithDigit(genetiveAbbrevMonthNames) ? FORMATFLAGS.UseDigitPrefixInTokens : 0); formatFlags |= (ArrayElementsHaveSpace(monthNames) || ArrayElementsHaveSpace(genitveMonthNames) || ArrayElementsHaveSpace(abbrevMonthNames) || ArrayElementsHaveSpace(genetiveAbbrevMonthNames) ? FORMATFLAGS.UseSpacesInMonthNames : 0); return (formatFlags); } //////////////////////////////////////////////////////////////////////////// // // Scan the day names and set the correct format flag. // //////////////////////////////////////////////////////////////////////////// internal static FORMATFLAGS GetFormatFlagUseSpaceInDayNames(String[] dayNames, String[] abbrevDayNames) { return ((ArrayElementsHaveSpace(dayNames) || ArrayElementsHaveSpace(abbrevDayNames)) ? FORMATFLAGS.UseSpacesInDayNames : 0); } //////////////////////////////////////////////////////////////////////////// // // Check the calendar to see if it is HebrewCalendar and set the Hebrew format flag if necessary. // //////////////////////////////////////////////////////////////////////////// internal static FORMATFLAGS GetFormatFlagUseHebrewCalendar(int calID) { return (calID == (int)CalendarId.HEBREW ? FORMATFLAGS.UseHebrewParsing | FORMATFLAGS.UseLeapYearMonth : 0); } //----------------------------------------------------------------------------- // EqualStringArrays // compares two string arrays and return true if all elements of the first // array equals to all elmentsof the second array. // otherwise it returns false. //----------------------------------------------------------------------------- private static bool EqualStringArrays(string[] array1, string[] array2) { // Shortcut if they're the same array if (array1 == array2) { return true; } // This is effectively impossible if (array1.Length != array2.Length) { return false; } // Check each string for (int i = 0; i < array1.Length; i++) { if (!array1[i].Equals(array2[i])) { return false; } } return true; } //----------------------------------------------------------------------------- // ArrayElementsHaveSpace // It checks all input array elements if any of them has space character // returns true if found space character in one of the array elements. // otherwise returns false. //----------------------------------------------------------------------------- private static bool ArrayElementsHaveSpace(string[] array) { for (int i = 0; i < array.Length; i++) { // it is faster to check for space character manually instead of calling IndexOf // so we don't have to go to native code side. for (int j = 0; j < array[i].Length; j++) { if (Char.IsWhiteSpace(array[i][j])) { return true; } } } return false; } //////////////////////////////////////////////////////////////////////////// // // Check if any element of the array start with a digit. // //////////////////////////////////////////////////////////////////////////// private static bool ArrayElementsBeginWithDigit(string[] array) { for (int i = 0; i < array.Length; i++) { // it is faster to check for space character manually instead of calling IndexOf // so we don't have to go to native code side. if (array[i].Length > 0 && array[i][0] >= '0' && array[i][0] <= '9') { int index = 1; while (index < array[i].Length && array[i][index] >= '0' && array[i][index] <= '9') { // Skip other digits. index++; } if (index == array[i].Length) { return (false); } if (index == array[i].Length - 1) { // Skip known CJK month suffix. // CJK uses month name like "1\x6708", since \x6708 is a known month suffix, // we don't need the UseDigitPrefixInTokens since it is slower. switch (array[i][index]) { case '\x6708': // CJKMonthSuff case '\xc6d4': // KoreanMonthSuff return (false); } } if (index == array[i].Length - 4) { // Skip known CJK month suffix. // Starting with Windows 8, the CJK months for some cultures looks like: "1' \x6708'" // instead of just "1\x6708" if (array[i][index] == '\'' && array[i][index + 1] == ' ' && array[i][index + 2] == '\x6708' && array[i][index + 3] == '\'') { return (false); } } return (true); } } return false; } } }