diff options
Diffstat (limited to 'src/corefx/System.Globalization.Native')
-rw-r--r-- | src/corefx/System.Globalization.Native/CMakeLists.txt | 77 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/calendarData.cpp | 669 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/casing.cpp | 142 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/collation.cpp | 701 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/config.h.in | 4 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/configure.cmake | 27 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/errors.h | 36 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/holders.h | 102 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/idna.cpp | 80 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/locale.cpp | 206 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/locale.hpp | 52 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/localeNumberData.cpp | 558 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/localeStringData.cpp | 320 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/normalization.cpp | 88 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/timeZoneInfo.cpp | 59 |
15 files changed, 3121 insertions, 0 deletions
diff --git a/src/corefx/System.Globalization.Native/CMakeLists.txt b/src/corefx/System.Globalization.Native/CMakeLists.txt new file mode 100644 index 0000000000..3d9e392132 --- /dev/null +++ b/src/corefx/System.Globalization.Native/CMakeLists.txt @@ -0,0 +1,77 @@ + +project(System.Globalization.Native) + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +add_definitions(-DPIC=1) +add_definitions(-DBIT64=1) + +set(ICU_HOMEBREW_INC_PATH "/usr/local/opt/icu4c/include") + +find_path(UTYPES_H "unicode/utypes.h" PATHS ${ICU_HOMEBREW_INC_PATH}) +if(UTYPES_H STREQUAL UTYPES_H-NOTFOUND) + message(FATAL_ERROR "Cannont find utypes.h, try installing libicu-dev (or the appropriate package for your platform)") + return() +endif() + +if(NOT CLR_CMAKE_PLATFORM_DARWIN) + find_library(ICUUC icuuc) + if(ICUUC STREQUAL ICUUC-NOTFOUND) + message(FATAL_ERROR "Cannot find libicuuc, try installing libicu-dev (or the appropriate package for your platform)") + return() + endif() + + find_library(ICUI18N icui18n) + if(ICUI18N STREQUAL ICUI18N-NOTFOUND) + message(FATAL_ERROR "Cannot find libicui18n, try installing libicu-dev (or the appropriate package for your platform)") + return() + endif() +else() + find_library(ICUCORE icucore) + if(ICUI18N STREQUAL ICUCORE-NOTFOUND) + message(FATAL_ERROR "Cannot find libicucore, skipping build for System.Globalization.Native. .NET globalization is not expected to function.") + return() + endif() +endif() + +include(configure.cmake) + +add_compile_options(-fPIC) + +set(NATIVEGLOBALIZATION_SOURCES + calendarData.cpp + casing.cpp + collation.cpp + idna.cpp + locale.cpp + localeNumberData.cpp + localeStringData.cpp + normalization.cpp + timeZoneInfo.cpp +) + +include_directories(${UTYPES_H}) + +_add_library(System.Globalization.Native + SHARED + ${NATIVEGLOBALIZATION_SOURCES} +) + +# Disable the "lib" prefix. +set_target_properties(System.Globalization.Native PROPERTIES PREFIX "") + +if(NOT CLR_CMAKE_PLATFORM_DARWIN) + target_link_libraries(System.Globalization.Native + ${ICUUC} + ${ICUI18N} + ) +else() + target_link_libraries(System.Globalization.Native + ${ICUCORE} + ) + + add_definitions(-DU_DISABLE_RENAMING=1) +endif() + +# add the install targets +install_clr(System.Globalization.Native)
\ No newline at end of file diff --git a/src/corefx/System.Globalization.Native/calendarData.cpp b/src/corefx/System.Globalization.Native/calendarData.cpp new file mode 100644 index 0000000000..f91cc0cb57 --- /dev/null +++ b/src/corefx/System.Globalization.Native/calendarData.cpp @@ -0,0 +1,669 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include <assert.h> +#include <string.h> +#include <vector> + +#include "config.h" +#include "locale.hpp" +#include "holders.h" +#include "errors.h" + +#define GREGORIAN_NAME "gregorian" +#define JAPANESE_NAME "japanese" +#define BUDDHIST_NAME "buddhist" +#define HEBREW_NAME "hebrew" +#define DANGI_NAME "dangi" +#define PERSIAN_NAME "persian" +#define ISLAMIC_NAME "islamic" +#define ISLAMIC_UMALQURA_NAME "islamic-umalqura" +#define ROC_NAME "roc" + +#define JAPANESE_LOCALE_AND_CALENDAR "ja_JP@calendar=japanese" + +const UChar UDAT_MONTH_DAY_UCHAR[] = {'M', 'M', 'M', 'M', 'd', '\0'}; +const UChar UDAT_YEAR_NUM_MONTH_DAY_UCHAR[] = {'y', 'M', 'd', '\0'}; +const UChar UDAT_YEAR_MONTH_UCHAR[] = {'y', 'M', 'M', 'M', 'M', '\0'}; + +/* +* These values should be kept in sync with System.Globalization.CalendarId +*/ +enum CalendarId : int16_t +{ + UNINITIALIZED_VALUE = 0, + GREGORIAN = 1, // Gregorian (localized) calendar + GREGORIAN_US = 2, // Gregorian (U.S.) calendar + JAPAN = 3, // Japanese Emperor Era calendar + /* SSS_WARNINGS_OFF */ + TAIWAN = 4, // Taiwan Era calendar /* SSS_WARNINGS_ON */ + KOREA = 5, // Korean Tangun Era calendar + HIJRI = 6, // Hijri (Arabic Lunar) calendar + THAI = 7, // Thai calendar + HEBREW = 8, // Hebrew (Lunar) calendar + GREGORIAN_ME_FRENCH = 9, // Gregorian Middle East French calendar + GREGORIAN_ARABIC = 10, // Gregorian Arabic calendar + GREGORIAN_XLIT_ENGLISH = 11, // Gregorian Transliterated English calendar + GREGORIAN_XLIT_FRENCH = 12, + // Note that all calendars after this point are MANAGED ONLY for now. + JULIAN = 13, + JAPANESELUNISOLAR = 14, + CHINESELUNISOLAR = 15, + SAKA = 16, // reserved to match Office but not implemented in our code + LUNAR_ETO_CHN = 17, // reserved to match Office but not implemented in our code + LUNAR_ETO_KOR = 18, // reserved to match Office but not implemented in our code + LUNAR_ETO_ROKUYOU = 19, // reserved to match Office but not implemented in our code + KOREANLUNISOLAR = 20, + TAIWANLUNISOLAR = 21, + PERSIAN = 22, + UMALQURA = 23, + LAST_CALENDAR = 23 // Last calendar ID +}; + +/* +* These values should be kept in sync with System.Globalization.CalendarDataType +*/ +enum CalendarDataType : int32_t +{ + Uninitialized = 0, + NativeName = 1, + MonthDay = 2, + ShortDates = 3, + LongDates = 4, + YearMonths = 5, + DayNames = 6, + AbbrevDayNames = 7, + MonthNames = 8, + AbbrevMonthNames = 9, + SuperShortDayNames = 10, + MonthGenitiveNames = 11, + AbbrevMonthGenitiveNames = 12, + EraNames = 13, + AbbrevEraNames = 14, +}; + +// the function pointer definition for the callback used in EnumCalendarInfo +typedef void (*EnumCalendarInfoCallback)(const UChar*, const void*); + +/* +Function: +GetCalendarName + +Gets the associated ICU calendar name for the CalendarId. +*/ +const char* GetCalendarName(CalendarId calendarId) +{ + switch (calendarId) + { + case JAPAN: + return JAPANESE_NAME; + case THAI: + return BUDDHIST_NAME; + case HEBREW: + return HEBREW_NAME; + case KOREA: + return DANGI_NAME; + case PERSIAN: + return PERSIAN_NAME; + case HIJRI: + return ISLAMIC_NAME; + case UMALQURA: + return ISLAMIC_UMALQURA_NAME; + case TAIWAN: + return ROC_NAME; + case GREGORIAN: + case GREGORIAN_US: + case GREGORIAN_ARABIC: + case GREGORIAN_ME_FRENCH: + case GREGORIAN_XLIT_ENGLISH: + case GREGORIAN_XLIT_FRENCH: + case JULIAN: + case LUNAR_ETO_CHN: + case LUNAR_ETO_KOR: + case LUNAR_ETO_ROKUYOU: + case SAKA: + // don't support the lunisolar calendars until we have a solid understanding + // of how they map to the ICU/CLDR calendars + case CHINESELUNISOLAR: + case KOREANLUNISOLAR: + case JAPANESELUNISOLAR: + case TAIWANLUNISOLAR: + default: + return GREGORIAN_NAME; + } +} + +/* +Function: +GetCalendarId + +Gets the associated CalendarId for the ICU calendar name. +*/ +CalendarId GetCalendarId(const char* calendarName) +{ + if (strcasecmp(calendarName, GREGORIAN_NAME) == 0) + // TODO: what about the other gregorian types? + return GREGORIAN; + else if (strcasecmp(calendarName, JAPANESE_NAME) == 0) + return JAPAN; + else if (strcasecmp(calendarName, BUDDHIST_NAME) == 0) + return THAI; + else if (strcasecmp(calendarName, HEBREW_NAME) == 0) + return HEBREW; + else if (strcasecmp(calendarName, DANGI_NAME) == 0) + return KOREA; + else if (strcasecmp(calendarName, PERSIAN_NAME) == 0) + return PERSIAN; + else if (strcasecmp(calendarName, ISLAMIC_NAME) == 0) + return HIJRI; + else if (strcasecmp(calendarName, ISLAMIC_UMALQURA_NAME) == 0) + return UMALQURA; + else if (strcasecmp(calendarName, ROC_NAME) == 0) + return TAIWAN; + else + return UNINITIALIZED_VALUE; +} + +/* +Function: +GetCalendars + +Returns the list of CalendarIds that are available for the specified locale. +*/ +extern "C" int32_t GlobalizationNative_GetCalendars( + const UChar* localeName, CalendarId* calendars, int32_t calendarsCapacity) +{ + UErrorCode err = U_ZERO_ERROR; + char locale[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &err); + + if (U_FAILURE(err)) + return 0; + + UEnumeration* pEnum = ucal_getKeywordValuesForLocale("calendar", locale, TRUE, &err); + UEnumerationHolder enumHolder(pEnum, err); + + if (U_FAILURE(err)) + return 0; + + int stringEnumeratorCount = uenum_count(pEnum, &err); + if (U_FAILURE(err)) + return 0; + + int calendarsReturned = 0; + for (int i = 0; i < stringEnumeratorCount && calendarsReturned < calendarsCapacity; i++) + { + int32_t calendarNameLength = 0; + const char* calendarName = uenum_next(pEnum, &calendarNameLength, &err); + if (U_SUCCESS(err)) + { + CalendarId calendarId = GetCalendarId(calendarName); + if (calendarId != UNINITIALIZED_VALUE) + { + calendars[calendarsReturned] = calendarId; + calendarsReturned++; + } + } + } + + return calendarsReturned; +} + +/* +Function: +GetMonthDayPattern + +Gets the Month-Day DateTime pattern for the specified locale. +*/ +ResultCode GetMonthDayPattern(const char* locale, UChar* sMonthDay, int32_t stringCapacity) +{ + UErrorCode err = U_ZERO_ERROR; + UDateTimePatternGenerator* pGenerator = udatpg_open(locale, &err); + UDateTimePatternGeneratorHolder generatorHolder(pGenerator, err); + + if (U_FAILURE(err)) + return GetResultCode(err); + + udatpg_getBestPattern(pGenerator, UDAT_MONTH_DAY_UCHAR, -1, sMonthDay, stringCapacity, &err); + + return GetResultCode(err); +} + +/* +Function: +GetNativeCalendarName + +Gets the native calendar name. +*/ +ResultCode GetNativeCalendarName(const char* locale, CalendarId calendarId, UChar* nativeName, int32_t stringCapacity) +{ + UErrorCode err = U_ZERO_ERROR; + ULocaleDisplayNames* pDisplayNames = uldn_open(locale, ULDN_STANDARD_NAMES, &err); + ULocaleDisplayNamesHolder displayNamesHolder(pDisplayNames, err); + + uldn_keyValueDisplayName(pDisplayNames, "calendar", GetCalendarName(calendarId), nativeName, stringCapacity, &err); + + return GetResultCode(err); +} + +/* +Function: +GetCalendarInfo + +Gets a single string of calendar information by filling the result parameter +with the requested value. +*/ +extern "C" ResultCode GlobalizationNative_GetCalendarInfo( + const UChar* localeName, CalendarId calendarId, CalendarDataType dataType, UChar* result, int32_t resultCapacity) +{ + UErrorCode err = U_ZERO_ERROR; + char locale[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &err); + + if (U_FAILURE(err)) + return UnknownError; + + switch (dataType) + { + case NativeName: + return GetNativeCalendarName(locale, calendarId, result, resultCapacity); + case MonthDay: + return GetMonthDayPattern(locale, result, resultCapacity); + default: + assert(false); + return UnknownError; + } +} + +/* +Function: +InvokeCallbackForDatePattern + +Gets the ICU date pattern for the specified locale and EStyle and invokes the +callback with the result. +*/ +bool InvokeCallbackForDatePattern(const char* locale, + UDateFormatStyle style, + EnumCalendarInfoCallback callback, + const void* context) +{ + UErrorCode err = U_ZERO_ERROR; + UDateFormat* pFormat = udat_open(UDAT_NONE, style, locale, nullptr, 0, nullptr, 0, &err); + UDateFormatHolder formatHolder(pFormat, err); + + if (U_FAILURE(err)) + return false; + + UErrorCode ignore = U_ZERO_ERROR; + int32_t patternLen = udat_toPattern(pFormat, false, nullptr, 0, &ignore); + + std::vector<UChar> pattern(patternLen + 1, '\0'); + + udat_toPattern(pFormat, false, pattern.data(), patternLen + 1, &err); + + if (U_SUCCESS(err)) + { + callback(pattern.data(), context); + } + + return U_SUCCESS(err); +} + +/* +Function: +InvokeCallbackForDateTimePattern + +Gets the DateTime pattern for the specified skeleton and invokes the callback +with the retrieved value. +*/ +bool InvokeCallbackForDateTimePattern(const char* locale, + const UChar* patternSkeleton, + EnumCalendarInfoCallback callback, + const void* context) +{ + UErrorCode err = U_ZERO_ERROR; + UDateTimePatternGenerator* pGenerator = udatpg_open(locale, &err); + UDateTimePatternGeneratorHolder generatorHolder(pGenerator, err); + + if (U_FAILURE(err)) + return false; + + UErrorCode ignore = U_ZERO_ERROR; + int32_t patternLen = udatpg_getBestPattern(pGenerator, patternSkeleton, -1, nullptr, 0, &ignore); + + std::vector<UChar> bestPattern(patternLen + 1, '\0'); + + udatpg_getBestPattern(pGenerator, patternSkeleton, -1, bestPattern.data(), patternLen + 1, &err); + + if (U_SUCCESS(err)) + { + callback(bestPattern.data(), context); + } + + return U_SUCCESS(err); +} + +/* +Function: +EnumSymbols + +Enumerates all of the symbols of a type for a locale and calendar and invokes a callback +for each value. +*/ +bool EnumSymbols(const char* locale, + CalendarId calendarId, + UDateFormatSymbolType type, + int32_t startIndex, + EnumCalendarInfoCallback callback, + const void* context) +{ + UErrorCode err = U_ZERO_ERROR; + UDateFormat* pFormat = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, locale, nullptr, 0, nullptr, 0, &err); + UDateFormatHolder formatHolder(pFormat, err); + + if (U_FAILURE(err)) + return false; + + char localeWithCalendarName[ULOC_FULLNAME_CAPACITY]; + strncpy(localeWithCalendarName, locale, ULOC_FULLNAME_CAPACITY); + uloc_setKeywordValue("calendar", GetCalendarName(calendarId), localeWithCalendarName, ULOC_FULLNAME_CAPACITY, &err); + + if (U_FAILURE(err)) + return false; + + UCalendar* pCalendar = ucal_open(nullptr, 0, localeWithCalendarName, UCAL_DEFAULT, &err); + UCalendarHolder calendarHolder(pCalendar, err); + + if (U_FAILURE(err)) + return false; + + udat_setCalendar(pFormat, pCalendar); + + int32_t symbolCount = udat_countSymbols(pFormat, type); + + for (int32_t i = startIndex; i < symbolCount; i++) + { + UErrorCode ignore = U_ZERO_ERROR; + int symbolLen = udat_getSymbols(pFormat, type, i, nullptr, 0, &ignore); + + std::vector<UChar> symbolBuf(symbolLen + 1, '\0'); + + udat_getSymbols(pFormat, type, i, symbolBuf.data(), symbolBuf.size(), &err); + + assert(U_SUCCESS(err)); + + if (U_FAILURE(err)) + return false; + + callback(symbolBuf.data(), context); + } + + return true; +} + +bool EnumUResourceBundle(const UResourceBundle* bundle, EnumCalendarInfoCallback callback, const void* context) +{ + int32_t eraNameCount = ures_getSize(bundle); + + for (int i = 0; i < eraNameCount; i++) + { + UErrorCode status = U_ZERO_ERROR; + int32_t ignore; // We don't care about the length of the string as it is null terminated. + const UChar* eraName = ures_getStringByIndex(bundle, i, &ignore, &status); + + if (U_SUCCESS(status)) + { + callback(eraName, context); + } + } + + return true; +} + +/* +Function: +EnumAbbrevEraNames + +Enumerates all the abbreviated era names of the specified locale and calendar, invoking the +callback function for each era name. +*/ +bool EnumAbbrevEraNames(const char* locale, + CalendarId calendarId, + EnumCalendarInfoCallback callback, + const void* context) +{ + // The C-API for ICU provides no way to get at the abbreviated era names for a calendar (so we can't use EnumSymbols + // here). Instead we will try to walk the ICU resource tables directly and fall back to regular era names if can't + // find good data. + char localeNameBuf[ULOC_FULLNAME_CAPACITY]; + char parentNameBuf[ULOC_FULLNAME_CAPACITY]; + + char* localeNamePtr = localeNameBuf; + char* parentNamePtr = parentNameBuf; + + strncpy(localeNamePtr, locale, ULOC_FULLNAME_CAPACITY); + + while (true) + { + UErrorCode status = U_ZERO_ERROR; + + UResourceBundle* rootResBundle = ures_open(nullptr, localeNamePtr, &status); + UResourceBundleHolder rootResBundleHolder(rootResBundle, status); + + UResourceBundle* calResBundle = ures_getByKey(rootResBundle, "calendar", nullptr, &status); + UResourceBundleHolder calResBundleHolder(calResBundle, status); + + UResourceBundle* targetCalResBundle = + ures_getByKey(calResBundle, GetCalendarName(calendarId), nullptr, &status); + UResourceBundleHolder targetCalResBundleHolder(targetCalResBundle, status); + + UResourceBundle* erasColResBundle = ures_getByKey(targetCalResBundle, "eras", nullptr, &status); + UResourceBundleHolder erasColResBundleHolder(erasColResBundle, status); + + UResourceBundle* erasResBundle = ures_getByKey(erasColResBundle, "narrow", nullptr, &status); + UResourceBundleHolder erasResBundleHolder(erasResBundle, status); + + if (U_SUCCESS(status)) + { + EnumUResourceBundle(erasResBundle, callback, context); + return true; + } + + // Couldn't find the data we need for this locale, we should fallback. + if (localeNameBuf[0] == 0x0) + { + // We are already at the root locale so there is nothing to fall back to, just use the regular eras. + break; + } + + uloc_getParent(localeNamePtr, parentNamePtr, ULOC_FULLNAME_CAPACITY, &status); + + if (U_FAILURE(status)) + { + // Something bad happened getting the parent name, bail out. + break; + } + + // Swap localeNamePtr and parentNamePtr, parentNamePtr is what we want to use on the next iteration + // and we can use the current localeName as scratch space if we have to fall back on that + // iteration. + + char* temp = localeNamePtr; + localeNamePtr = parentNamePtr; + parentNamePtr = temp; + } + + // Walking the resource bundles didn't work, just use the regular eras. + return EnumSymbols(locale, calendarId, UDAT_ERAS, 0, callback, context); +} + +/* +Function: +EnumCalendarInfo + +Retrieves a collection of calendar string data specified by the locale, +calendar, and data type. +Allows for a collection of calendar string data to be retrieved by invoking +the callback for each value in the collection. +The context parameter is passed through to the callback along with each string. +*/ +extern "C" int32_t GlobalizationNative_EnumCalendarInfo( + EnumCalendarInfoCallback callback, + const UChar* localeName, + CalendarId calendarId, + CalendarDataType dataType, + const void* context) +{ + UErrorCode err = U_ZERO_ERROR; + char locale[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &err); + + if (U_FAILURE(err)) + return false; + + switch (dataType) + { + case ShortDates: + // ShortDates to map kShort and kMedium in ICU, but also adding the "yMd" + // skeleton as well, as this closely matches what is used on Windows + return InvokeCallbackForDatePattern(locale, UDAT_SHORT, callback, context) && + InvokeCallbackForDatePattern(locale, UDAT_MEDIUM, callback, context) && + InvokeCallbackForDateTimePattern(locale, UDAT_YEAR_NUM_MONTH_DAY_UCHAR, callback, context); + case LongDates: + // LongDates map to kFull and kLong in ICU. + return InvokeCallbackForDatePattern(locale, UDAT_FULL, callback, context) && + InvokeCallbackForDatePattern(locale, UDAT_LONG, callback, context); + case YearMonths: + return InvokeCallbackForDateTimePattern(locale, UDAT_YEAR_MONTH_UCHAR, callback, context); + case DayNames: + return EnumSymbols(locale, calendarId, UDAT_STANDALONE_WEEKDAYS, 1, callback, context); + case AbbrevDayNames: + return EnumSymbols(locale, calendarId, UDAT_STANDALONE_SHORT_WEEKDAYS, 1, callback, context); + case MonthNames: + return EnumSymbols(locale, calendarId, UDAT_STANDALONE_MONTHS, 0, callback, context); + case AbbrevMonthNames: + return EnumSymbols(locale, calendarId, UDAT_STANDALONE_SHORT_MONTHS, 0, callback, context); + case SuperShortDayNames: + // UDAT_STANDALONE_SHORTER_WEEKDAYS was added in ICU 51, and CentOS 7 currently uses ICU 50. + // fallback to UDAT_STANDALONE_NARROW_WEEKDAYS in that case. +#if HAVE_UDAT_STANDALONE_SHORTER_WEEKDAYS + return EnumSymbols(locale, calendarId, UDAT_STANDALONE_SHORTER_WEEKDAYS, 1, callback, context); +#else + return EnumSymbols(locale, calendarId, UDAT_STANDALONE_NARROW_WEEKDAYS, 1, callback, context); +#endif + case MonthGenitiveNames: + return EnumSymbols(locale, calendarId, UDAT_MONTHS, 0, callback, context); + case AbbrevMonthGenitiveNames: + return EnumSymbols(locale, calendarId, UDAT_SHORT_MONTHS, 0, callback, context); + case EraNames: + return EnumSymbols(locale, calendarId, UDAT_ERAS, 0, callback, context); + case AbbrevEraNames: + return EnumAbbrevEraNames(locale, calendarId, callback, context); + default: + assert(false); + return false; + } +} + +/* +Function: +GetLatestJapaneseEra + +Gets the latest era in the Japanese calendar. +*/ +extern "C" int32_t GlobalizationNative_GetLatestJapaneseEra() +{ + UErrorCode err = U_ZERO_ERROR; + UCalendar* pCal = ucal_open(nullptr, 0, JAPANESE_LOCALE_AND_CALENDAR, UCAL_TRADITIONAL, &err); + UCalendarHolder calHolder(pCal, err); + + if (U_FAILURE(err)) + return 0; + + int32_t ret = ucal_getLimit(pCal, UCAL_ERA, UCAL_MAXIMUM, &err); + + return U_SUCCESS(err) ? ret : 0; +} + +/* +Function: +GetJapaneseEraInfo + +Gets the starting Gregorian date of the specified Japanese Era. +*/ +extern "C" int32_t GlobalizationNative_GetJapaneseEraStartDate( + int32_t era, int32_t* startYear, int32_t* startMonth, int32_t* startDay) +{ + *startYear = -1; + *startMonth = -1; + *startDay = -1; + + UErrorCode err = U_ZERO_ERROR; + UCalendar* pCal = ucal_open(nullptr, 0, JAPANESE_LOCALE_AND_CALENDAR, UCAL_TRADITIONAL, &err); + UCalendarHolder calHolder(pCal, err); + + if (U_FAILURE(err)) + return false; + + ucal_set(pCal, UCAL_ERA, era); + ucal_set(pCal, UCAL_YEAR, 1); + + // UCAL_EXTENDED_YEAR is the gregorian year for the JapaneseCalendar + *startYear = ucal_get(pCal, UCAL_EXTENDED_YEAR, &err); + if (U_FAILURE(err)) + return false; + + // set the date to Jan 1 + ucal_set(pCal, UCAL_MONTH, 0); + ucal_set(pCal, UCAL_DATE, 1); + + int32_t currentEra; + for (int i = 0; i <= 12; i++) + { + currentEra = ucal_get(pCal, UCAL_ERA, &err); + if (U_FAILURE(err)) + return false; + + if (currentEra == era) + { + for (int i = 0; i < 31; i++) + { + // subtract 1 day at a time until we get out of the specified Era + ucal_add(pCal, UCAL_DATE, -1, &err); + if (U_FAILURE(err)) + return false; + + currentEra = ucal_get(pCal, UCAL_ERA, &err); + if (U_FAILURE(err)) + return false; + + if (currentEra != era) + { + // add back 1 day to get back into the specified Era + ucal_add(pCal, UCAL_DATE, 1, &err); + if (U_FAILURE(err)) + return false; + + *startMonth = + ucal_get(pCal, UCAL_MONTH, &err) + 1; // ICU Calendar months are 0-based, but .NET is 1-based + if (U_FAILURE(err)) + return false; + + *startDay = ucal_get(pCal, UCAL_DATE, &err); + if (U_FAILURE(err)) + return false; + + return true; + } + } + } + + // add 1 month at a time until we get into the specified Era + ucal_add(pCal, UCAL_MONTH, 1, &err); + if (U_FAILURE(err)) + return false; + } + + return false; +} diff --git a/src/corefx/System.Globalization.Native/casing.cpp b/src/corefx/System.Globalization.Native/casing.cpp new file mode 100644 index 0000000000..58b47fc810 --- /dev/null +++ b/src/corefx/System.Globalization.Native/casing.cpp @@ -0,0 +1,142 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +#include <assert.h> +#include <stdint.h> +#include <unicode/uchar.h> +#include <unicode/utf16.h> + +/* +Function: +ChangeCase + +Performs upper or lower casing of a string into a new buffer. +No special casing is performed beyond that provided by ICU. +*/ +extern "C" void GlobalizationNative_ChangeCase( + const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) +{ + // Iterate through the string, decoding the next one or two UTF-16 code units + // into a codepoint and updating srcIdx to point to the next UTF-16 code unit + // to decode. Then upper or lower case it, write dstCodepoint into lpDst at + // offset dstIdx, and update dstIdx. + + // (The loop here has been manually cloned for each of the four cases, rather + // than having a single loop that internally branched based on bToUpper as the + // compiler wasn't doing that optimization, and it results in an ~15-20% perf + // improvement on longer strings.) + + UBool isError = FALSE; + int32_t srcIdx = 0, dstIdx = 0; + UChar32 srcCodepoint, dstCodepoint; + + if (bToUpper) + { + while (srcIdx < cwSrcLength) + { + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + dstCodepoint = u_toupper(srcCodepoint); + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + assert(isError == FALSE && srcIdx == dstIdx); + } + } + else + { + while (srcIdx < cwSrcLength) + { + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + dstCodepoint = u_tolower(srcCodepoint); + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + assert(isError == FALSE && srcIdx == dstIdx); + } + } +} + +/* +Function: +ChangeCaseInvariant + +Performs upper or lower casing of a string into a new buffer. +Special casing is performed to ensure that invariant casing +matches that of Windows in certain situations, e.g. Turkish i's. +*/ +extern "C" void GlobalizationNative_ChangeCaseInvariant( + const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) +{ + // See algorithmic comment in ChangeCase. + + UBool isError = FALSE; + int32_t srcIdx = 0, dstIdx = 0; + UChar32 srcCodepoint, dstCodepoint; + + if (bToUpper) + { + while (srcIdx < cwSrcLength) + { + // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131) + // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049). + // We special case it to match the Windows invariant behavior. + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + dstCodepoint = ((srcCodepoint == (UChar32)0x0131) ? (UChar32)0x0131 : u_toupper(srcCodepoint)); + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + assert(isError == FALSE && srcIdx == dstIdx); + } + } + else + { + while (srcIdx < cwSrcLength) + { + // On Windows with InvariantCulture, the LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130) + // lower cases to itself, whereas with ICU it lower cases to LATIN SMALL LETTER I (U+0069). + // We special case it to match the Windows invariant behavior. + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + dstCodepoint = ((srcCodepoint == (UChar32)0x0130) ? (UChar32)0x0130 : u_tolower(srcCodepoint)); + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + assert(isError == FALSE && srcIdx == dstIdx); + } + } +} + +/* +Function: +ChangeCaseTurkish + +Performs upper or lower casing of a string into a new buffer, performing special +casing for Turkish. +*/ +extern "C" void GlobalizationNative_ChangeCaseTurkish( + const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) +{ + // See algorithmic comment in ChangeCase. + + UBool isError = FALSE; + int32_t srcIdx = 0, dstIdx = 0; + UChar32 srcCodepoint, dstCodepoint; + + if (bToUpper) + { + while (srcIdx < cwSrcLength) + { + // In turkish casing, LATIN SMALL LETTER I (U+0069) upper cases to LATIN + // CAPITAL LETTER I WITH DOT ABOVE (U+0130). + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint)); + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + assert(isError == FALSE && srcIdx == dstIdx); + } + } + else + { + while (srcIdx < cwSrcLength) + { + // In turkish casing, LATIN CAPITAL LETTER I (U+0049) lower cases to + // LATIN SMALL LETTER DOTLESS I (U+0131). + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint)); + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + assert(isError == FALSE && srcIdx == dstIdx); + } + } +} diff --git a/src/corefx/System.Globalization.Native/collation.cpp b/src/corefx/System.Globalization.Native/collation.cpp new file mode 100644 index 0000000000..6039a9ef39 --- /dev/null +++ b/src/corefx/System.Globalization.Native/collation.cpp @@ -0,0 +1,701 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +#include <assert.h> +#include <pthread.h> +#include <stdint.h> +#include <vector> +#include <map> +#include <unicode/uchar.h> +#include <unicode/ucol.h> +#include <unicode/usearch.h> +#include <unicode/utf16.h> + +#include "config.h" + +const int32_t CompareOptionsIgnoreCase = 0x1; +const int32_t CompareOptionsIgnoreNonSpace = 0x2; +const int32_t CompareOptionsIgnoreSymbols = 0x4; +const int32_t CompareOptionsIgnoreKanaType = 0x8; +const int32_t CompareOptionsIgnoreWidth = 0x10; +// const int32_t CompareOptionsStringSort = 0x20000000; +// ICU's default is to use "StringSort", i.e. nonalphanumeric symbols come before alphanumeric. +// When StringSort is not specified (.NET's default), the sort order will be different between +// Windows and Unix platforms. The nonalphanumeric symbols will come after alphanumeric +// characters on Windows, but before on Unix. +// Since locale - specific string sort order can change from one version of Windows to the next, +// there is no reason to guarantee string sort order between Windows and ICU. Thus trying to +// change ICU's default behavior here isn't really justified unless someone has a strong reason +// for !StringSort to behave differently. + +typedef std::map<int32_t, UCollator*> TCollatorMap; +typedef std::pair<int32_t, UCollator*> TCollatorMapPair; + +/* + * For increased performance, we cache the UCollator objects for a locale and + * share them across threads. This is safe (and supported in ICU) if we ensure + * multiple threads are only ever dealing with const UCollators. + */ +typedef struct _sort_handle +{ + UCollator* regular; + TCollatorMap collatorsPerOption; + pthread_mutex_t collatorsLockObject; + + _sort_handle() : regular(nullptr) + { + int result = pthread_mutex_init(&collatorsLockObject, NULL); + if (result != 0) + { + assert(false && "Unexpected pthread_mutex_init return value."); + } + } + +} SortHandle; + +// Hiragana character range +const UChar hiraganaStart = 0x3041; +const UChar hiraganaEnd = 0x309e; +const UChar hiraganaToKatakanaOffset = 0x30a1 - 0x3041; + +// Mapping between half- and fullwidth characters. +// LowerChars are the characters that should sort lower than HigherChars +const UChar g_HalfFullLowerChars[] = { + // halfwidth characters + 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, + 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, + 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005d, + 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, + 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, + 0x007c, 0x007d, 0x007e, 0x00a2, 0x00a3, 0x00ac, 0x00af, 0x00a6, 0x00a5, 0x20a9, + + // fullwidth characters + 0x3002, 0x300c, 0x300d, 0x3001, 0x30fb, 0x30f2, 0x30a1, 0x30a3, 0x30a5, 0x30a7, 0x30a9, 0x30e3, 0x30e5, 0x30e7, 0x30c3, + 0x30a2, 0x30a4, 0x30a6, 0x30a8, 0x30aa, 0x30ab, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9, 0x30bb, 0x30bd, + 0x30bf, 0x30c1, 0x30c4, 0x30c6, 0x30c8, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d2, 0x30d5, 0x30d8, 0x30db, + 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e4, 0x30e6, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ef, 0x30f3, + 0x3164, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3138, 0x3139, 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, + 0x313f, 0x3140, 0x3141, 0x3142, 0x3143, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b, 0x314c, 0x314d, + 0x314e, 0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 0x3157, 0x3158, 0x3159, 0x315a, 0x315b, 0x315c, + 0x315d, 0x315e, 0x315f, 0x3160, 0x3161, 0x3162, 0x3163 + +}; +const UChar g_HalfFullHigherChars[] = { + // fullwidth characters + 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, + 0xff10, 0xff11, 0xff12, 0xff13, 0xff14, 0xff15, 0xff16, 0xff17, 0xff18, 0xff19, 0xff1a, 0xff1b, 0xff1c, 0xff1d, 0xff1e, + 0xff1f, 0xff20, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, + 0xff2e, 0xff2f, 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, 0xff3a, 0xff3b, 0xff3d, + 0xff3e, 0xff3f, 0xff40, 0xff41, 0xff42, 0xff43, 0xff44, 0xff45, 0xff46, 0xff47, 0xff48, 0xff49, 0xff4a, 0xff4b, 0xff4c, + 0xff4d, 0xff4e, 0xff4f, 0xff50, 0xff51, 0xff52, 0xff53, 0xff54, 0xff55, 0xff56, 0xff57, 0xff58, 0xff59, 0xff5a, 0xff5b, + 0xff5c, 0xff5d, 0xff5e, 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, + + // halfwidth characters + 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, + 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, + 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, + 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, + 0xffa0, 0xffa1, 0xffa2, 0xffa3, 0xffa4, 0xffa5, 0xffa6, 0xffa7, 0xffa8, 0xffa9, 0xffaa, 0xffab, 0xffac, 0xffad, 0xffae, + 0xffaf, 0xffb0, 0xffb1, 0xffb2, 0xffb3, 0xffb4, 0xffb5, 0xffb6, 0xffb7, 0xffb8, 0xffb9, 0xffba, 0xffbb, 0xffbc, 0xffbd, + 0xffbe, 0xffc2, 0xffc3, 0xffc4, 0xffc5, 0xffc6, 0xffc7, 0xffca, 0xffcb, 0xffcc, 0xffcd, 0xffce, 0xffcf, 0xffd2, 0xffd3, + 0xffd4, 0xffd5, 0xffd6, 0xffd7, 0xffda, 0xffdb, 0xffdc +}; +const int32_t g_HalfFullCharsLength = (sizeof(g_HalfFullHigherChars) / sizeof(UChar)); + +/* +ICU collation rules reserve any punctuation and whitespace characters for use in the syntax. +Thus, to use these characters in a rule, they need to be escaped. + +This rule was taken from http://www.unicode.org/reports/tr35/tr35-collation.html#Rules. +*/ +bool NeedsEscape(UChar character) +{ + return ((0x21 <= character && character <= 0x2f) + || (0x3a <= character && character <= 0x40) + || (0x5b <= character && character <= 0x60) + || (0x7b <= character && character <= 0x7e)); +} + +/* +Gets a value indicating whether the HalfFullHigher character is considered a symbol character. + +The ranges specified here are only checking for characters in the g_HalfFullHigherChars list and needs +to be combined with NeedsEscape above with the g_HalfFullLowerChars for all the IgnoreSymbols characters. +This is done so we can use range checks instead of comparing individual characters. + +These ranges were obtained by running the above characters through .NET CompareInfo.Compare +with CompareOptions.IgnoreSymbols on Windows. +*/ +bool IsHalfFullHigherSymbol(UChar character) +{ + return (0xffe0 <= character && character <= 0xffe6) + || (0xff61 <= character && character <= 0xff65); +} + +/* +Gets a string of custom collation rules, if necessary. + +Since the CompareOptions flags don't map 1:1 with ICU default functionality, we need to fall back to using +custom rules in order to support IgnoreKanaType and IgnoreWidth CompareOptions correctly. +*/ +std::vector<UChar> GetCustomRules(int32_t options, UColAttributeValue strength, bool isIgnoreSymbols) +{ + bool isIgnoreKanaType = (options & CompareOptionsIgnoreKanaType) == CompareOptionsIgnoreKanaType; + bool isIgnoreWidth = (options & CompareOptionsIgnoreWidth) == CompareOptionsIgnoreWidth; + + // kana differs at the tertiary level + bool needsIgnoreKanaTypeCustomRule = isIgnoreKanaType && strength >= UCOL_TERTIARY; + bool needsNotIgnoreKanaTypeCustomRule = !isIgnoreKanaType && strength < UCOL_TERTIARY; + + // character width differs at the tertiary level + bool needsIgnoreWidthCustomRule = isIgnoreWidth && strength >= UCOL_TERTIARY; + bool needsNotIgnoreWidthCustomRule = !isIgnoreWidth && strength < UCOL_TERTIARY; + + std::vector<UChar> customRules; + if (needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule || needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule) + { + // If we need to create customRules, the KanaType custom rule will be 88 kana characters * 4 = 352 chars long + // and the Width custom rule will be at least 215 halfwidth characters * 4 = 860 chars long. + // Use 512 as the starting size, so the customRules won't have to grow if we are just + // doing the KanaType custom rule. + customRules.reserve(512); + + if (needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule) + { + UChar compareChar = needsIgnoreKanaTypeCustomRule ? '=' : '<'; + + for (UChar hiraganaChar = hiraganaStart; hiraganaChar <= hiraganaEnd; hiraganaChar++) + { + // Hiragana is the range 3041 to 3096 & 309D & 309E + if (hiraganaChar <= 0x3096 || hiraganaChar >= 0x309D) // characters between 3096 and 309D are not mapped to katakana + { + customRules.push_back('&'); + customRules.push_back(hiraganaChar); + customRules.push_back(compareChar); + customRules.push_back(hiraganaChar + hiraganaToKatakanaOffset); + } + } + } + + if (needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule) + { + UChar compareChar = needsIgnoreWidthCustomRule ? '=' : '<'; + + UChar lowerChar; + UChar higherChar; + bool needsEscape; + for (int i = 0; i < g_HalfFullCharsLength; i++) + { + lowerChar = g_HalfFullLowerChars[i]; + higherChar = g_HalfFullHigherChars[i]; + // the lower chars need to be checked for escaping since they contain ASCII punctuation + needsEscape = NeedsEscape(lowerChar); + + // when isIgnoreSymbols is true and we are not ignoring width, check to see if + // this character is a symbol, and if so skip it + if (!(isIgnoreSymbols && needsNotIgnoreWidthCustomRule && (needsEscape || IsHalfFullHigherSymbol(higherChar)))) + { + customRules.push_back('&'); + + if (needsEscape) + { + customRules.push_back('\\'); + } + customRules.push_back(lowerChar); + + customRules.push_back(compareChar); + customRules.push_back(higherChar); + } + } + } + } + + return customRules; +} + +/* + * The collator returned by this function is owned by the callee and must be + * closed when this method returns with a U_SUCCESS UErrorCode. + * + * On error, the return value is undefined. + */ +UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr) +{ + UColAttributeValue strength = ucol_getStrength(pCollator); + + bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase; + bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace; + bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols; + + if (isIgnoreCase) + { + strength = UCOL_SECONDARY; + } + + if (isIgnoreNonSpace) + { + strength = UCOL_PRIMARY; + } + + UCollator* pClonedCollator; + std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols); + if (customRules.empty()) + { + pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr); + } + else + { + int32_t customRuleLength = customRules.size(); + + int32_t localeRulesLength; + const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength); + + std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0'); + for (int i = 0; i < localeRulesLength; i++) + { + completeRules[i] = localeRules[i]; + } + for (int i = 0; i < customRuleLength; i++) + { + completeRules[localeRulesLength + i] = customRules[i]; + } + + pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr); + } + + if (isIgnoreSymbols) + { + ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr); + + // by default, ICU alternate shifted handling only ignores punctuation, but + // IgnoreSymbols needs symbols and currency as well, so change the "variable top" + // to include all symbols and currency +#if HAVE_SET_MAX_VARIABLE + ucol_setMaxVariable(pClonedCollator, UCOL_REORDER_CODE_CURRENCY, pErr); +#else + // 0xfdfc is the last currency character before the first digit character + // in http://source.icu-project.org/repos/icu/icu/tags/release-52-1/source/data/unidata/FractionalUCA.txt + const UChar ignoreSymbolsVariableTop[] = { 0xfdfc }; + ucol_setVariableTop(pClonedCollator, ignoreSymbolsVariableTop, 1, pErr); +#endif + } + + ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr); + + // casing differs at the tertiary level. + // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On + if (strength < UCOL_TERTIARY && !isIgnoreCase) + { + ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr); + } + + return pClonedCollator; +} + +// Returns TRUE if all the collation elements in str are completely ignorable +bool CanIgnoreAllCollationElements(const UCollator* pColl, const UChar* lpStr, int32_t length) +{ + bool result = FALSE; + UErrorCode err = U_ZERO_ERROR; + UCollationElements* pCollElem = ucol_openElements(pColl, lpStr, length, &err); + + if (U_SUCCESS(err)) + { + int32_t curCollElem = UCOL_NULLORDER; + + result = TRUE; + + while ((curCollElem = ucol_next(pCollElem, &err)) != UCOL_NULLORDER) + { + if (curCollElem != 0) + { + result = FALSE; + break; + } + } + + if (U_FAILURE(err)) + { + result = FALSE; + } + + ucol_closeElements(pCollElem); + } + + return result; + +} + +extern "C" SortHandle* GlobalizationNative_GetSortHandle(const char* lpLocaleName) +{ + SortHandle* pSortHandle = new SortHandle(); + + UErrorCode err = U_ZERO_ERROR; + + pSortHandle->regular = ucol_open(lpLocaleName, &err); + + if (U_FAILURE(err)) + { + if (pSortHandle->regular != nullptr) + ucol_close(pSortHandle->regular); + + delete pSortHandle; + pSortHandle = nullptr; + } + + return pSortHandle; +} + +extern "C" void GlobalizationNative_CloseSortHandle(SortHandle* pSortHandle) +{ + ucol_close(pSortHandle->regular); + pSortHandle->regular = nullptr; + + TCollatorMap::iterator it; + for (it = pSortHandle->collatorsPerOption.begin(); it != pSortHandle->collatorsPerOption.end(); it++) + { + ucol_close(it->second); + } + + pthread_mutex_destroy(&pSortHandle->collatorsLockObject); + + delete pSortHandle; +} + +const UCollator* GetCollatorFromSortHandle(SortHandle* pSortHandle, int32_t options, UErrorCode* pErr) +{ + UCollator* pCollator; + if (options == 0) + { + pCollator = pSortHandle->regular; + } + else + { + int lockResult = pthread_mutex_lock(&pSortHandle->collatorsLockObject); + if (lockResult != 0) + { + assert(false && "Unexpected pthread_mutex_lock return value."); + } + + TCollatorMap::iterator entry = pSortHandle->collatorsPerOption.find(options); + if (entry == pSortHandle->collatorsPerOption.end()) + { + pCollator = CloneCollatorWithOptions(pSortHandle->regular, options, pErr); + pSortHandle->collatorsPerOption[options] = pCollator; + } + else + { + pCollator = entry->second; + } + + pthread_mutex_unlock(&pSortHandle->collatorsLockObject); + } + + return pCollator; +} + +/* +Function: +CompareString +*/ +extern "C" int32_t GlobalizationNative_CompareString( + SortHandle* pSortHandle, const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, int32_t cwStr2Length, int32_t options) +{ + static_assert(UCOL_EQUAL == 0, "managed side requires 0 for equal strings"); + static_assert(UCOL_LESS < 0, "managed side requires less than zero for a < b"); + static_assert(UCOL_GREATER > 0, "managed side requires greater than zero for a > b"); + + UCollationResult result = UCOL_EQUAL; + UErrorCode err = U_ZERO_ERROR; + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); + + if (U_SUCCESS(err)) + { + result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length); + } + + return result; +} + +/* +Function: +IndexOf +*/ +extern "C" int32_t GlobalizationNative_IndexOf( + SortHandle* pSortHandle, + const UChar* lpTarget, + int32_t cwTargetLength, + const UChar* lpSource, + int32_t cwSourceLength, + int32_t options) +{ + static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found"); + + int32_t result = USEARCH_DONE; + UErrorCode err = U_ZERO_ERROR; + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); + + if (U_SUCCESS(err)) + { + UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); + + if (U_SUCCESS(err)) + { + result = usearch_first(pSearch, &err); + usearch_close(pSearch); + } + } + + return result; +} + +/* +Function: +LastIndexOf +*/ +extern "C" int32_t GlobalizationNative_LastIndexOf( + SortHandle* pSortHandle, + const UChar* lpTarget, + int32_t cwTargetLength, + const UChar* lpSource, + int32_t cwSourceLength, + int32_t options) +{ + static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found"); + + int32_t result = USEARCH_DONE; + UErrorCode err = U_ZERO_ERROR; + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); + + if (U_SUCCESS(err)) + { + UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); + + if (U_SUCCESS(err)) + { + result = usearch_last(pSearch, &err); + usearch_close(pSearch); + } + } + + return result; +} + +/* +Static Function: +AreEqualOrdinalIgnoreCase +*/ +static bool AreEqualOrdinalIgnoreCase(UChar32 one, UChar32 two) +{ + // Return whether the two characters are identical or would be identical if they were upper-cased. + + if (one == two) + { + return true; + } + + if (one == 0x0131 || two == 0x0131) + { + // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131) + // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049). + // We special case it to match the Windows invariant behavior. + return false; + } + + return u_toupper(one) == u_toupper(two); +} + +/* +Function: +IndexOfOrdinalIgnoreCase +*/ +extern "C" int32_t GlobalizationNative_IndexOfOrdinalIgnoreCase( + const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t findLast) +{ + int32_t result = -1; + + int32_t endIndex = cwSourceLength - cwTargetLength; + assert(endIndex >= 0); + + int32_t i = 0; + while (i <= endIndex) + { + int32_t srcIdx = i, trgIdx = 0; + const UChar *src = lpSource, *trg = lpTarget; + UChar32 srcCodepoint, trgCodepoint; + + bool match = true; + while (trgIdx < cwTargetLength) + { + U16_NEXT(src, srcIdx, cwSourceLength, srcCodepoint); + U16_NEXT(trg, trgIdx, cwTargetLength, trgCodepoint); + if (!AreEqualOrdinalIgnoreCase(srcCodepoint, trgCodepoint)) + { + match = false; + break; + } + } + + if (match) + { + result = i; + if (!findLast) + { + break; + } + } + + U16_FWD_1(lpSource, i, cwSourceLength); + } + + return result; +} + +/* + Return value is a "Win32 BOOL" (1 = true, 0 = false) + */ +extern "C" int32_t GlobalizationNative_StartsWith( + SortHandle* pSortHandle, + const UChar* lpTarget, + int32_t cwTargetLength, + const UChar* lpSource, + int32_t cwSourceLength, + int32_t options) +{ + int32_t result = FALSE; + UErrorCode err = U_ZERO_ERROR; + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); + + if (U_SUCCESS(err)) + { + UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); + int32_t idx = USEARCH_DONE; + + if (U_SUCCESS(err)) + { + idx = usearch_first(pSearch, &err); + if (idx != USEARCH_DONE) + { + if (idx == 0) + { + result = TRUE; + } + else + { + result = CanIgnoreAllCollationElements(pColl, lpSource, idx); + } + } + + usearch_close(pSearch); + } + } + + return result; +} + +/* + Return value is a "Win32 BOOL" (1 = true, 0 = false) + */ +extern "C" int32_t GlobalizationNative_EndsWith( + SortHandle* pSortHandle, + const UChar* lpTarget, + int32_t cwTargetLength, + const UChar* lpSource, + int32_t cwSourceLength, + int32_t options) +{ + int32_t result = FALSE; + UErrorCode err = U_ZERO_ERROR; + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); + + if (U_SUCCESS(err)) + { + UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err); + int32_t idx = USEARCH_DONE; + + if (U_SUCCESS(err)) + { + idx = usearch_last(pSearch, &err); + + if (idx != USEARCH_DONE) + { + if ((idx + usearch_getMatchedLength(pSearch)) == cwSourceLength) + { + result = TRUE; + } + else + { + int32_t matchEnd = idx + usearch_getMatchedLength(pSearch); + int32_t remainingStringLength = cwSourceLength - matchEnd; + + result = CanIgnoreAllCollationElements(pColl, lpSource + matchEnd, remainingStringLength); + } + } + + usearch_close(pSearch); + } + } + + return result; +} + +extern "C" int32_t GlobalizationNative_GetSortKey( + SortHandle* pSortHandle, + const UChar* lpStr, + int32_t cwStrLength, + uint8_t* sortKey, + int32_t cbSortKeyLength, + int32_t options) +{ + UErrorCode err = U_ZERO_ERROR; + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); + int32_t result = 0; + + if (U_SUCCESS(err)) + { + result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength); + } + + return result; +} + +extern "C" int32_t GlobalizationNative_CompareStringOrdinalIgnoreCase( + const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, int32_t cwStr2Length) +{ + assert(lpStr1 != nullptr); + assert(cwStr1Length >= 0); + assert(lpStr2 != nullptr); + assert(cwStr2Length >= 0); + + int32_t str1Idx = 0; + int32_t str2Idx = 0; + + while (str1Idx < cwStr1Length && str2Idx < cwStr2Length) + { + UChar32 str1Codepoint; + UChar32 str2Codepoint; + + U16_NEXT(lpStr1, str1Idx, cwStr1Length, str1Codepoint); + U16_NEXT(lpStr2, str2Idx, cwStr2Length, str2Codepoint); + + if (str1Codepoint != str2Codepoint && u_toupper(str1Codepoint) != u_toupper(str2Codepoint)) + { + return str1Codepoint < str2Codepoint ? -1 : 1; + } + } + + if (cwStr1Length < cwStr2Length) + { + return -1; + } + + if (cwStr2Length < cwStr1Length) + { + return 1; + } + + return 0; +} diff --git a/src/corefx/System.Globalization.Native/config.h.in b/src/corefx/System.Globalization.Native/config.h.in new file mode 100644 index 0000000000..633bcfbd0f --- /dev/null +++ b/src/corefx/System.Globalization.Native/config.h.in @@ -0,0 +1,4 @@ +#pragma once + +#cmakedefine01 HAVE_UDAT_STANDALONE_SHORTER_WEEKDAYS +#cmakedefine01 HAVE_SET_MAX_VARIABLE diff --git a/src/corefx/System.Globalization.Native/configure.cmake b/src/corefx/System.Globalization.Native/configure.cmake new file mode 100644 index 0000000000..138127e003 --- /dev/null +++ b/src/corefx/System.Globalization.Native/configure.cmake @@ -0,0 +1,27 @@ +include(CheckCXXSourceCompiles) +include(CheckSymbolExists) + +set(CMAKE_REQUIRED_INCLUDES ${UTYPES_H} ${ICU_HOMEBREW_INC_PATH}) + +CHECK_CXX_SOURCE_COMPILES(" + #include <unicode/udat.h> + int main() { UDateFormatSymbolType e = UDAT_STANDALONE_SHORTER_WEEKDAYS; } +" HAVE_UDAT_STANDALONE_SHORTER_WEEKDAYS) + +if(NOT CLR_CMAKE_PLATFORM_DARWIN) + set(CMAKE_REQUIRED_LIBRARIES ${ICUUC} ${ICUI18N}) +else() + set(CMAKE_REQUIRED_LIBRARIES ${ICUCORE}) +endif() + +check_symbol_exists( + ucol_setMaxVariable + "unicode/ucol.h" + HAVE_SET_MAX_VARIABLE) + +unset(CMAKE_REQUIRED_LIBRARIES) +unset(CMAKE_REQUIRED_INCLUDES) + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/config.h.in + ${CMAKE_CURRENT_BINARY_DIR}/config.h) diff --git a/src/corefx/System.Globalization.Native/errors.h b/src/corefx/System.Globalization.Native/errors.h new file mode 100644 index 0000000000..2bfbdb2ba1 --- /dev/null +++ b/src/corefx/System.Globalization.Native/errors.h @@ -0,0 +1,36 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#pragma once + +#include <unicode/utypes.h> + +/* +* These values should be kept in sync with +* Interop.GlobalizationInterop.ResultCode +*/ +enum ResultCode : int32_t +{ + Success = 0, + UnknownError = 1, + InsufficentBuffer = 2, +}; + +/* +Converts a UErrorCode to a ResultCode. +*/ +static ResultCode GetResultCode(UErrorCode err) +{ + if (err == U_BUFFER_OVERFLOW_ERROR || err == U_STRING_NOT_TERMINATED_WARNING) + { + return InsufficentBuffer; + } + + if (U_SUCCESS(err)) + { + return Success; + } + + return UnknownError; +} diff --git a/src/corefx/System.Globalization.Native/holders.h b/src/corefx/System.Globalization.Native/holders.h new file mode 100644 index 0000000000..529451f77c --- /dev/null +++ b/src/corefx/System.Globalization.Native/holders.h @@ -0,0 +1,102 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include <unicode/ucal.h> +#include <unicode/uenum.h> +#include <unicode/udatpg.h> +#include <unicode/udat.h> +#include <unicode/unum.h> +#include <unicode/uldnames.h> +#include <unicode/ures.h> + +// IcuHolder is a template that can manage the lifetime of a raw pointer to ensure that it is cleaned up at the correct +// time. The general usage pattern is to aquire some ICU resource via an _open call, then construct a holder using the +// pointer and UErrorCode to manage the lifetime. When the holder goes out of scope, the coresponding close method is +// called on the pointer. +template <typename T, typename Closer> +class IcuHolder +{ + public: + IcuHolder(T* p, UErrorCode err) + { + m_p = U_SUCCESS(err) ? p : nullptr; + } + + ~IcuHolder() + { + if (m_p != nullptr) + { + Closer()(m_p); + } + } + + private: + T* m_p; + IcuHolder(const IcuHolder&) = delete; + IcuHolder operator=(const IcuHolder&) = delete; +}; + +struct UCalendarCloser +{ + void operator()(UCalendar* pCal) const + { + ucal_close(pCal); + } +}; + +struct UEnumerationCloser +{ + void operator()(UEnumeration* pEnum) const + { + uenum_close(pEnum); + } +}; + +struct UDateTimePatternGeneratorCloser +{ + void operator()(UDateTimePatternGenerator* pGenerator) const + { + udatpg_close(pGenerator); + } +}; + +struct UDateFormatCloser +{ + void operator()(UDateFormat* pDateFormat) const + { + udat_close(pDateFormat); + } +}; + +struct UNumberFormatCloser +{ + void operator()(UNumberFormat* pNumberFormat) const + { + unum_close(pNumberFormat); + } +}; + +struct ULocaleDisplayNamesCloser +{ + void operator()(ULocaleDisplayNames* pLocaleDisplayNames) const + { + uldn_close(pLocaleDisplayNames); + } +}; + +struct UResourceBundleCloser +{ + void operator()(UResourceBundle* pResourceBundle) const + { + ures_close(pResourceBundle); + } +}; + +typedef IcuHolder<UCalendar, UCalendarCloser> UCalendarHolder; +typedef IcuHolder<UEnumeration, UEnumerationCloser> UEnumerationHolder; +typedef IcuHolder<UDateTimePatternGenerator, UDateTimePatternGeneratorCloser> UDateTimePatternGeneratorHolder; +typedef IcuHolder<UDateFormat, UDateFormatCloser> UDateFormatHolder; +typedef IcuHolder<UNumberFormat, UNumberFormatCloser> UNumberFormatHolder; +typedef IcuHolder<ULocaleDisplayNames, ULocaleDisplayNamesCloser> ULocaleDisplayNamesHolder; +typedef IcuHolder<UResourceBundle, UResourceBundleCloser> UResourceBundleHolder; diff --git a/src/corefx/System.Globalization.Native/idna.cpp b/src/corefx/System.Globalization.Native/idna.cpp new file mode 100644 index 0000000000..4820d2c3f2 --- /dev/null +++ b/src/corefx/System.Globalization.Native/idna.cpp @@ -0,0 +1,80 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +#include <stdint.h> +#include <unicode/uidna.h> + +const uint32_t AllowUnassigned = 0x1; +const uint32_t UseStd3AsciiRules = 0x2; + +uint32_t GetOptions(uint32_t flags) +{ + // Using Nontransitional to Unicode and Check ContextJ to match the current behavior of .NET on Windows + uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE | UIDNA_CHECK_CONTEXTJ; + + if ((flags & AllowUnassigned) == AllowUnassigned) + { + options |= UIDNA_ALLOW_UNASSIGNED; + } + + if ((flags & UseStd3AsciiRules) == UseStd3AsciiRules) + { + options |= UIDNA_USE_STD3_RULES; + } + + return options; +} + +/* +Function: +ToASCII + +Used by System.Globalization.IdnMapping.GetAsciiCore to convert an Unicode +domain name to ASCII + +Return values: +0: internal error during conversion. +>0: the length of the converted string (not including the null terminator). +*/ +extern "C" int32_t GlobalizationNative_ToAscii( + uint32_t flags, const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) +{ + UErrorCode err = U_ZERO_ERROR; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + + UIDNA* pIdna = uidna_openUTS46(GetOptions(flags), &err); + + int32_t asciiStrLen = uidna_nameToASCII(pIdna, lpSrc, cwSrcLength, lpDst, cwDstLength, &info, &err); + + uidna_close(pIdna); + + return ((U_SUCCESS(err) || (err == U_BUFFER_OVERFLOW_ERROR)) && (info.errors == 0)) ? asciiStrLen : 0; +} + +/* +Function: +ToUnicode + +Used by System.Globalization.IdnMapping.GetUnicodeCore to convert an ASCII name +to Unicode + +Return values: +0: internal error during conversion. +>0: the length of the converted string (not including the null terminator). +*/ +extern "C" int32_t GlobalizationNative_ToUnicode( + int32_t flags, const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) +{ + UErrorCode err = U_ZERO_ERROR; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + + UIDNA* pIdna = uidna_openUTS46(GetOptions(flags), &err); + + int32_t unicodeStrLen = uidna_nameToUnicode(pIdna, lpSrc, cwSrcLength, lpDst, cwDstLength, &info, &err); + + uidna_close(pIdna); + + return ((U_SUCCESS(err) || (err == U_BUFFER_OVERFLOW_ERROR)) && (info.errors == 0)) ? unicodeStrLen : 0; +} diff --git a/src/corefx/System.Globalization.Native/locale.cpp b/src/corefx/System.Globalization.Native/locale.cpp new file mode 100644 index 0000000000..1cb564a45a --- /dev/null +++ b/src/corefx/System.Globalization.Native/locale.cpp @@ -0,0 +1,206 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +#include <assert.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <locale.h> + +#include "locale.hpp" + +int32_t UErrorCodeToBool(UErrorCode status) +{ + if (U_SUCCESS(status)) + { + return 1; + } + + // assert errors that should never occur + assert(status != U_BUFFER_OVERFLOW_ERROR); + assert(status != U_INTERNAL_PROGRAM_ERROR); + + // add possible SetLastError support here + + return 0; +} + +int32_t GetLocale( + const UChar* localeName, char* localeNameResult, int32_t localeNameResultLength, bool canonicalize, UErrorCode* err) +{ + char localeNameTemp[ULOC_FULLNAME_CAPACITY] = {0}; + int32_t localeLength; + + // Convert ourselves instead of doing u_UCharsToChars as that function considers '@' a variant and stops. + for (int i = 0; i < ULOC_FULLNAME_CAPACITY - 1; i++) + { + UChar c = localeName[i]; + + if (c > (UChar)0x7F) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return ULOC_FULLNAME_CAPACITY; + } + + localeNameTemp[i] = (char)c; + + if (c == (UChar)0x0) + { + break; + } + } + + if (canonicalize) + { + localeLength = uloc_canonicalize(localeNameTemp, localeNameResult, localeNameResultLength, err); + } + else + { + localeLength = uloc_getName(localeNameTemp, localeNameResult, localeNameResultLength, err); + } + + if (U_SUCCESS(*err)) + { + // Make sure the "language" part of the locale is reasonable (i.e. we can fetch it and it is within range). + // This mimics how the C++ ICU API determines if a locale is "bogus" or not. + + char language[ULOC_LANG_CAPACITY]; + uloc_getLanguage(localeNameTemp, language, ULOC_LANG_CAPACITY, err); + + if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) + { + // ULOC_LANG_CAPACITY includes the null terminator, so if we couldn't extract the language with the null + // terminator, the language must be invalid. + + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + } + + return localeLength; +} + +UErrorCode u_charsToUChars_safe(const char* str, UChar* value, int32_t valueLength) +{ + int len = strlen(str); + + if (len >= valueLength) + { + return U_BUFFER_OVERFLOW_ERROR; + } + + u_charsToUChars(str, value, len + 1); + return U_ZERO_ERROR; +} + +int32_t FixupLocaleName(UChar* value, int32_t valueLength) +{ + int32_t i = 0; + for (; i < valueLength; i++) + { + if (value[i] == (UChar)'\0') + { + break; + } + else if (value[i] == (UChar)'_') + { + value[i] = (UChar)'-'; + } + } + + return i; +} + +bool IsEnvVarSet(const char* name) +{ + const char* value = getenv(name); + + return (value != nullptr) && (strcmp("", value) != 0); +} + +// The behavior of uloc_getDefault() on POSIX systems is to query +// setlocale(LC_MESSAGES) and use that value, unless it is C or +// POSIX. In that case it tries to read LC_ALL, LC_MESSAGES and LANG +// and then falls back to en_US_POSIX if none of them are set. +// +// en_US_POSIX is a weird locale since the collation rules treat 'a' +// and 'A' as different letters even when ignoring case. Furthermore +// it's common for LC_ALL, LC_MESSAGES and LANG to be unset when +// running under Docker. +// +// We'd rather default to invariant in this case. If any of these +// are set, we'll just call into ICU and let it do whatever +// normalization it would do. +const char* DetectDefaultLocaleName() +{ + char* loc = setlocale(LC_MESSAGES, nullptr); + + if (loc != nullptr && (strcmp("C", loc) == 0 || strcmp("POSIX", loc) == 0)) + { + if (!IsEnvVarSet("LC_ALL") && !IsEnvVarSet("LC_MESSAGES") && !IsEnvVarSet("LANG")) + { + return ""; + } + } + + return uloc_getDefault(); +} + +extern "C" int32_t GlobalizationNative_GetLocaleName(const UChar* localeName, UChar* value, int32_t valueLength) +{ + UErrorCode status = U_ZERO_ERROR; + + char localeNameBuffer[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, localeNameBuffer, ULOC_FULLNAME_CAPACITY, true, &status); + + if (U_SUCCESS(status)) + { + status = u_charsToUChars_safe(localeNameBuffer, value, valueLength); + + if (U_SUCCESS(status)) + { + FixupLocaleName(value, valueLength); + } + } + + return UErrorCodeToBool(status); +} + +extern "C" int32_t GlobalizationNative_GetDefaultLocaleName(UChar* value, int32_t valueLength) +{ + char localeNameBuffer[ULOC_FULLNAME_CAPACITY]; + UErrorCode status = U_ZERO_ERROR; + + const char* defaultLocale = DetectDefaultLocaleName(); + + uloc_getBaseName(defaultLocale, localeNameBuffer, ULOC_FULLNAME_CAPACITY, &status); + + if (U_SUCCESS(status)) + { + status = u_charsToUChars_safe(localeNameBuffer, value, valueLength); + + if (U_SUCCESS(status)) + { + int localeNameLen = FixupLocaleName(value, valueLength); + + char collationValueTemp[ULOC_KEYWORDS_CAPACITY]; + int32_t collationLen = + uloc_getKeywordValue(defaultLocale, "collation", collationValueTemp, ULOC_KEYWORDS_CAPACITY, &status); + + if (U_SUCCESS(status) && collationLen > 0) + { + // copy the collation; managed uses a "_" to represent collation (not + // "@collation=") + status = u_charsToUChars_safe("_", &value[localeNameLen], valueLength - localeNameLen); + if (U_SUCCESS(status)) + { + status = u_charsToUChars_safe( + collationValueTemp, &value[localeNameLen + 1], valueLength - localeNameLen - 1); + } + } + } + } + + return UErrorCodeToBool(status); +} diff --git a/src/corefx/System.Globalization.Native/locale.hpp b/src/corefx/System.Globalization.Native/locale.hpp new file mode 100644 index 0000000000..4845859960 --- /dev/null +++ b/src/corefx/System.Globalization.Native/locale.hpp @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "unicode/locid.h" + +/* +Function: +UErrorCodeToBool + +Convert an ICU UErrorCode to a Bool compatible with Win32 +Returns 1 for success, 0 otherwise +*/ +int32_t UErrorCodeToBool(UErrorCode code); + +/* +Function: +GetLocale + +Converts a managed localeName into something ICU understands and can use as a localeName. +*/ +int32_t GetLocale(const UChar* localeName, + char* localeNameResult, + int32_t localeNameResultLength, + bool canonicalize, + UErrorCode* err); + +/* +Function: +u_charsToUChars_safe + +Copies the given null terminated char* to UChar with error checking. Replacement for ICU u_charsToUChars +*/ +UErrorCode u_charsToUChars_safe(const char* str, UChar* value, int32_t valueLength); + +/* +Function: +FixupLocaleName + +Replace underscores with hyphens to interop with existing .NET code. +Returns the length of the string. +*/ +int FixupLocaleName(UChar* value, int32_t valueLength); + +/* +Function: +DetectDefaultLocaleName + +Detect the default locale for the machine, defaulting to Invaraint if +we can't compute one (different from uloc_getDefault()) would do. +*/ +const char* DetectDefaultLocaleName(); diff --git a/src/corefx/System.Globalization.Native/localeNumberData.cpp b/src/corefx/System.Globalization.Native/localeNumberData.cpp new file mode 100644 index 0000000000..595cb130b2 --- /dev/null +++ b/src/corefx/System.Globalization.Native/localeNumberData.cpp @@ -0,0 +1,558 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +#include <assert.h> +#include <string.h> +#include <vector> + +#include <unicode/ulocdata.h> + +#include "locale.hpp" +#include "holders.h" + +// invariant character definitions used by ICU +#define UCHAR_CURRENCY ((UChar)0x00A4) // international currency +#define UCHAR_SPACE ((UChar)0x0020) // space +#define UCHAR_NBSPACE ((UChar)0x00A0) // space +#define UCHAR_DIGIT ((UChar)0x0023) // '#' +#define UCHAR_SEMICOLON ((UChar)0x003B) // ';' +#define UCHAR_MINUS ((UChar)0x002D) // '-' +#define UCHAR_PERCENT ((UChar)0x0025) // '%' +#define UCHAR_OPENPAREN ((UChar)0x0028) // '(' +#define UCHAR_CLOSEPAREN ((UChar)0x0029) // ')' + +#define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0])) + +// Enum that corresponds to managed enum CultureData.LocaleNumberData. +// The numeric values of the enum members match their Win32 counterparts. +enum LocaleNumberData : int32_t +{ + LanguageId = 0x00000001, + MeasurementSystem = 0x0000000D, + FractionalDigitsCount = 0x00000011, + NegativeNumberFormat = 0x00001010, + MonetaryFractionalDigitsCount = 0x00000019, + PositiveMonetaryNumberFormat = 0x0000001B, + NegativeMonetaryNumberFormat = 0x0000001C, + FirstDayofWeek = 0x0000100C, + FirstWeekOfYear = 0x0000100D, + ReadingLayout = 0x00000070, + NegativePercentFormat = 0x00000074, + PositivePercentFormat = 0x00000075, + Digit = 0x00000010, + Monetary = 0x00000018 +}; + +// Enum that corresponds to managed enum System.Globalization.CalendarWeekRule +enum CalendarWeekRule : int32_t +{ + FirstDay = 0, + FirstFullWeek = 1, + FirstFourDayWeek = 2 +}; + +/* +Function: +NormalizeNumericPattern + +Returns a numeric string pattern in a format that we can match against the +appropriate managed pattern. +*/ +std::string NormalizeNumericPattern(const UChar* srcPattern, bool isNegative) +{ + // A srcPattern example: "#,##0.00 C;(#,##0.00 C)" but where C is the + // international currency symbol (UCHAR_CURRENCY) + // The positive pattern comes first, then an optional negative pattern + // separated by a semicolon + // A destPattern example: "(C n)" where C represents the currency symbol, and + // n is the number + std::string destPattern; + + int iStart = 0; + int iEnd = u_strlen(srcPattern); + int32_t iNegativePatternStart = -1; + + for (int i = iStart; i < iEnd; i++) + { + if (srcPattern[i] == ';') + { + iNegativePatternStart = i; + } + } + + if (iNegativePatternStart >= 0) + { + if (isNegative) + { + iStart = iNegativePatternStart + 1; + } + else + { + iEnd = iNegativePatternStart - 1; + } + } + + bool minusAdded = false; + bool digitAdded = false; + bool currencyAdded = false; + bool spaceAdded = false; + + for (int i = iStart; i <= iEnd; i++) + { + UChar ch = srcPattern[i]; + switch (ch) + { + case UCHAR_DIGIT: + if (!digitAdded) + { + digitAdded = true; + destPattern.push_back('n'); + } + break; + + case UCHAR_CURRENCY: + if (!currencyAdded) + { + currencyAdded = true; + destPattern.push_back('C'); + } + break; + + case UCHAR_SPACE: + case UCHAR_NBSPACE: + if (!spaceAdded) + { + spaceAdded = true; + destPattern.push_back(' '); + } + else + { + assert(false); + } + break; + + case UCHAR_MINUS: + case UCHAR_OPENPAREN: + case UCHAR_CLOSEPAREN: + minusAdded = true; + destPattern.push_back(static_cast<char>(ch)); + break; + + case UCHAR_PERCENT: + destPattern.push_back('%'); + break; + } + } + + // if there is no negative subpattern, the ICU convention is to prefix the + // minus sign + if (isNegative && !minusAdded) + { + destPattern.insert(destPattern.begin(), '-'); + } + + return destPattern; +} + +/* +Function: +GetNumericPattern + +Determines the pattern from the decimalFormat and returns the matching pattern's +index from patterns[]. +Returns index -1 if no pattern is found. +*/ +int GetNumericPattern(const UNumberFormat* pNumberFormat, const char* patterns[], int patternsCount, bool isNegative) +{ + const int INVALID_FORMAT = -1; + const int MAX_DOTNET_NUMERIC_PATTERN_LENGTH = 6; // example: "(C n)" plus terminator + + UErrorCode ignore = U_ZERO_ERROR; + int32_t icuPatternLength = unum_toPattern(pNumberFormat, false, nullptr, 0, &ignore); + + std::vector<UChar> icuPattern(icuPatternLength + 1, '\0'); + + UErrorCode err = U_ZERO_ERROR; + + unum_toPattern(pNumberFormat, false, icuPattern.data(), icuPattern.size(), &err); + + assert(U_SUCCESS(err)); + + std::string normalizedPattern = NormalizeNumericPattern(icuPattern.data(), isNegative); + + assert(normalizedPattern.length() > 0); + assert(normalizedPattern.length() < MAX_DOTNET_NUMERIC_PATTERN_LENGTH); + + if (normalizedPattern.length() == 0 || normalizedPattern.length() >= MAX_DOTNET_NUMERIC_PATTERN_LENGTH) + { + return INVALID_FORMAT; + } + + for (int i = 0; i < patternsCount; i++) + { + if (strcmp(normalizedPattern.c_str(), patterns[i]) == 0) + { + return i; + } + }; + + assert(false); // should have found a valid pattern + return INVALID_FORMAT; +} + +/* +Function: +GetCurrencyNegativePattern + +Implementation of NumberFormatInfo.CurrencyNegativePattern. +Returns the pattern index. +*/ +int GetCurrencyNegativePattern(const char* locale) +{ + const int DEFAULT_VALUE = 0; + static const char* Patterns[] = {"(Cn)", + "-Cn", + "C-n", + "Cn-", + "(nC)", + "-nC", + "n-C", + "nC-", + "-n C", + "-C n", + "n C-", + "C n-", + "C -n", + "n- C", + "(C n)", + "(n C)"}; + UErrorCode status = U_ZERO_ERROR; + + UNumberFormat* pFormat = unum_open(UNUM_CURRENCY, nullptr, 0, locale, nullptr, &status); + UNumberFormatHolder formatHolder(pFormat, status); + + assert(U_SUCCESS(status)); + + if (U_SUCCESS(status)) + { + int value = GetNumericPattern(pFormat, Patterns, ARRAY_LENGTH(Patterns), true); + if (value >= 0) + { + return value; + } + } + + return DEFAULT_VALUE; +} + +/* +Function: +GetCurrencyPositivePattern + +Implementation of NumberFormatInfo.CurrencyPositivePattern. +Returns the pattern index. +*/ +int GetCurrencyPositivePattern(const char* locale) +{ + const int DEFAULT_VALUE = 0; + static const char* Patterns[] = {"Cn", "nC", "C n", "n C"}; + UErrorCode status = U_ZERO_ERROR; + + UNumberFormat* pFormat = unum_open(UNUM_CURRENCY, nullptr, 0, locale, nullptr, &status); + UNumberFormatHolder formatHolder(pFormat, status); + + assert(U_SUCCESS(status)); + + if (U_SUCCESS(status)) + { + int value = GetNumericPattern(pFormat, Patterns, ARRAY_LENGTH(Patterns), false); + if (value >= 0) + { + return value; + } + } + + return DEFAULT_VALUE; +} + +/* +Function: +GetNumberNegativePattern + +Implementation of NumberFormatInfo.NumberNegativePattern. +Returns the pattern index. +*/ +int GetNumberNegativePattern(const char* locale) +{ + const int DEFAULT_VALUE = 1; + static const char* Patterns[] = {"(n)", "-n", "- n", "n-", "n -"}; + UErrorCode status = U_ZERO_ERROR; + + UNumberFormat* pFormat = unum_open(UNUM_DECIMAL, nullptr, 0, locale, nullptr, &status); + UNumberFormatHolder formatHolder(pFormat, status); + + assert(U_SUCCESS(status)); + + if (U_SUCCESS(status)) + { + int value = GetNumericPattern(pFormat, Patterns, ARRAY_LENGTH(Patterns), true); + if (value >= 0) + { + return value; + } + } + + return DEFAULT_VALUE; +} + +/* +Function: +GetPercentNegativePattern + +Implementation of NumberFormatInfo.PercentNegativePattern. +Returns the pattern index. +*/ +int GetPercentNegativePattern(const char* locale) +{ + const int DEFAULT_VALUE = 0; + static const char* Patterns[] = { + "-n %", "-n%", "-%n", "%-n", "%n-", "n-%", "n%-", "-% n", "n %-", "% n-", "% -n", "n- %"}; + UErrorCode status = U_ZERO_ERROR; + + UNumberFormat* pFormat = unum_open(UNUM_PERCENT, nullptr, 0, locale, nullptr, &status); + UNumberFormatHolder formatHolder(pFormat, status); + + assert(U_SUCCESS(status)); + + if (U_SUCCESS(status)) + { + int value = GetNumericPattern(pFormat, Patterns, ARRAY_LENGTH(Patterns), true); + if (value >= 0) + { + return value; + } + } + + return DEFAULT_VALUE; +} + +/* +Function: +GetPercentPositivePattern + +Implementation of NumberFormatInfo.PercentPositivePattern. +Returns the pattern index. +*/ +int GetPercentPositivePattern(const char* locale) +{ + const int DEFAULT_VALUE = 0; + static const char* Patterns[] = {"n %", "n%", "%n", "% n"}; + UErrorCode status = U_ZERO_ERROR; + + UNumberFormat* pFormat = unum_open(UNUM_PERCENT, nullptr, 0, locale, nullptr, &status); + UNumberFormatHolder formatHolder(pFormat, status); + + assert(U_SUCCESS(status)); + + if (U_SUCCESS(status)) + { + int value = GetNumericPattern(pFormat, Patterns, ARRAY_LENGTH(Patterns), false); + if (value >= 0) + { + return value; + } + } + + return DEFAULT_VALUE; +} + +/* +Function: +GetMeasurementSystem + +Obtains the measurement system for the local, determining if US or metric. +Returns 1 for US, 0 otherwise. +*/ +UErrorCode GetMeasurementSystem(const char* locale, int32_t* value) +{ + UErrorCode status = U_ZERO_ERROR; + + UMeasurementSystem measurementSystem = ulocdata_getMeasurementSystem(locale, &status); + if (U_SUCCESS(status)) + { + *value = (measurementSystem == UMeasurementSystem::UMS_US) ? 1 : 0; + } + + return status; +} + +/* +PAL Function: +GetLocaleInfoInt + +Obtains integer locale information +Returns 1 for success, 0 otherwise +*/ +extern "C" int32_t GlobalizationNative_GetLocaleInfoInt( + const UChar* localeName, LocaleNumberData localeNumberData, int32_t* value) +{ + UErrorCode status = U_ZERO_ERROR; + char locale[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &status); + + if (U_FAILURE(status)) + { + return UErrorCodeToBool(U_ILLEGAL_ARGUMENT_ERROR); + } + + switch (localeNumberData) + { + case LanguageId: + *value = uloc_getLCID(locale); + break; + case MeasurementSystem: + status = GetMeasurementSystem(locale, value); + break; + case FractionalDigitsCount: + { + UNumberFormat* numformat = unum_open(UNUM_DECIMAL, NULL, 0, locale, NULL, &status); + if (U_SUCCESS(status)) + { + *value = unum_getAttribute(numformat, UNUM_MAX_FRACTION_DIGITS); + unum_close(numformat); + } + break; + } + case NegativeNumberFormat: + *value = GetNumberNegativePattern(locale); + break; + case MonetaryFractionalDigitsCount: + { + UNumberFormat* numformat = unum_open(UNUM_CURRENCY, NULL, 0, locale, NULL, &status); + if (U_SUCCESS(status)) + { + *value = unum_getAttribute(numformat, UNUM_MAX_FRACTION_DIGITS); + unum_close(numformat); + } + break; + } + case PositiveMonetaryNumberFormat: + *value = GetCurrencyPositivePattern(locale); + break; + case NegativeMonetaryNumberFormat: + *value = GetCurrencyNegativePattern(locale); + break; + case FirstWeekOfYear: + { + // corresponds to DateTimeFormat.CalendarWeekRule + UCalendar* pCal = ucal_open(nullptr, 0, locale, UCAL_TRADITIONAL, &status); + UCalendarHolder calHolder(pCal, status); + + if (U_SUCCESS(status)) + { + // values correspond to LOCALE_IFIRSTWEEKOFYEAR + int minDaysInWeek = ucal_getAttribute(pCal, UCAL_MINIMAL_DAYS_IN_FIRST_WEEK); + if (minDaysInWeek == 1) + { + *value = CalendarWeekRule::FirstDay; + } + else if (minDaysInWeek == 7) + { + *value = CalendarWeekRule::FirstFullWeek; + } + else if (minDaysInWeek >= 4) + { + *value = CalendarWeekRule::FirstFourDayWeek; + } + else + { + status = U_UNSUPPORTED_ERROR; + } + } + break; + } + case ReadingLayout: + { + // coresponds to values 0 and 1 in LOCALE_IREADINGLAYOUT (values 2 and 3 not + // used in coreclr) + // 0 - Left to right (such as en-US) + // 1 - Right to left (such as arabic locales) + ULayoutType orientation = uloc_getCharacterOrientation(locale, &status); + // alternative implementation in ICU 54+ is uloc_isRightToLeft() which + // also supports script tags in locale + if (U_SUCCESS(status)) + { + *value = (orientation == ULOC_LAYOUT_RTL) ? 1 : 0; + } + break; + } + case FirstDayofWeek: + { + UCalendar* pCal = ucal_open(nullptr, 0, locale, UCAL_TRADITIONAL, &status); + UCalendarHolder calHolder(pCal, status); + + if (U_SUCCESS(status)) + { + *value = ucal_getAttribute(pCal, UCAL_FIRST_DAY_OF_WEEK) - 1; // .NET is 0-based and ICU is 1-based + } + break; + } + case NegativePercentFormat: + *value = GetPercentNegativePattern(locale); + break; + case PositivePercentFormat: + *value = GetPercentPositivePattern(locale); + break; + default: + status = U_UNSUPPORTED_ERROR; + assert(false); + break; + } + + return UErrorCodeToBool(status); +} + +/* +PAL Function: +GetLocaleInfoGroupingSizes + +Obtains grouping sizes for decimal and currency +Returns 1 for success, 0 otherwise +*/ +extern "C" int32_t GlobalizationNative_GetLocaleInfoGroupingSizes( + const UChar* localeName, LocaleNumberData localeGroupingData, int32_t* primaryGroupSize, int32_t* secondaryGroupSize) +{ + UErrorCode status = U_ZERO_ERROR; + char locale[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &status); + + if (U_FAILURE(status)) + { + return UErrorCodeToBool(U_ILLEGAL_ARGUMENT_ERROR); + } + + UNumberFormatStyle style; + switch (localeGroupingData) + { + case Digit: + style = UNUM_DECIMAL; + break; + case Monetary: + style = UNUM_CURRENCY; + break; + default: + return UErrorCodeToBool(U_UNSUPPORTED_ERROR); + } + + UNumberFormat* numformat = unum_open(style, NULL, 0, locale, NULL, &status); + if (U_SUCCESS(status)) + { + *primaryGroupSize = unum_getAttribute(numformat, UNUM_GROUPING_SIZE); + *secondaryGroupSize = unum_getAttribute(numformat, UNUM_SECONDARY_GROUPING_SIZE); + unum_close(numformat); + } + + return UErrorCodeToBool(status); +} diff --git a/src/corefx/System.Globalization.Native/localeStringData.cpp b/src/corefx/System.Globalization.Native/localeStringData.cpp new file mode 100644 index 0000000000..927da67095 --- /dev/null +++ b/src/corefx/System.Globalization.Native/localeStringData.cpp @@ -0,0 +1,320 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +#include <assert.h> +#include <string.h> +#include <vector> + +#include "locale.hpp" +#include "holders.h" + +// Enum that corresponds to managed enum CultureData.LocaleStringData. +// The numeric values of the enum members match their Win32 counterparts. +enum LocaleStringData : int32_t +{ + LocalizedDisplayName = 0x00000002, + EnglishDisplayName = 0x00000072, + NativeDisplayName = 0x00000073, + LocalizedLanguageName = 0x0000006f, + EnglishLanguageName = 0x00001001, + NativeLanguageName = 0x00000004, + EnglishCountryName = 0x00001002, + NativeCountryName = 0x00000008, + ListSeparator = 0x0000000C, + DecimalSeparator = 0x0000000E, + ThousandSeparator = 0x0000000F, + Digits = 0x00000013, + MonetarySymbol = 0x00000014, + Iso4217MonetarySymbol = 0x00000015, + MonetaryDecimalSeparator = 0x00000016, + MonetaryThousandSeparator = 0x00000017, + AMDesignator = 0x00000028, + PMDesignator = 0x00000029, + PositiveSign = 0x00000050, + NegativeSign = 0x00000051, + Iso639LanguageName = 0x00000059, + Iso3166CountryName = 0x0000005A, + NaNSymbol = 0x00000069, + PositiveInfinitySymbol = 0x0000006a, + ParentName = 0x0000006d, + PercentSymbol = 0x00000076, + PerMilleSymbol = 0x00000077 +}; + +/* +Function: +GetLocaleInfoDecimalFormatSymbol + +Obtains the value of a DecimalFormatSymbols +*/ +UErrorCode +GetLocaleInfoDecimalFormatSymbol(const char* locale, UNumberFormatSymbol symbol, UChar* value, int32_t valueLength) +{ + UErrorCode status = U_ZERO_ERROR; + UNumberFormat* pFormat = unum_open(UNUM_DECIMAL, nullptr, 0, locale, nullptr, &status); + UNumberFormatHolder formatHolder(pFormat, status); + + if (U_FAILURE(status)) + { + return status; + } + + unum_getSymbol(pFormat, symbol, value, valueLength, &status); + + return status; +} + +/* +Function: +GetDigitSymbol + +Obtains the value of a Digit DecimalFormatSymbols +*/ +UErrorCode GetDigitSymbol(const char* locale, + UErrorCode previousStatus, + UNumberFormatSymbol symbol, + int digit, + UChar* value, + int32_t valueLength) +{ + if (U_FAILURE(previousStatus)) + { + return previousStatus; + } + + return GetLocaleInfoDecimalFormatSymbol(locale, symbol, value + digit, valueLength - digit); +} + +/* +Function: +GetLocaleInfoAmPm + +Obtains the value of the AM or PM string for a locale. +*/ +UErrorCode GetLocaleInfoAmPm(const char* locale, bool am, UChar* value, int32_t valueLength) +{ + UErrorCode status = U_ZERO_ERROR; + UDateFormat* pFormat = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, locale, nullptr, 0, nullptr, 0, &status); + UDateFormatHolder formatHolder(pFormat, status); + + if (U_FAILURE(status)) + { + return status; + } + + udat_getSymbols(pFormat, UDAT_AM_PMS, am ? 0 : 1, value, valueLength, &status); + + return status; +} + +/* +Function: +GetLocaleIso639LanguageName + +Gets the language name for a locale (via uloc_getLanguage) and converts the result to UChars +*/ +UErrorCode GetLocaleIso639LanguageName(const char* locale, UChar* value, int32_t valueLength) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t length = uloc_getLanguage(locale, nullptr, 0, &status); + + std::vector<char> buf(length + 1, '\0'); + status = U_ZERO_ERROR; + + uloc_getLanguage(locale, buf.data(), length + 1, &status); + + if (U_SUCCESS(status)) + { + status = u_charsToUChars_safe(buf.data(), value, valueLength); + } + + return status; +} + +/* +Function: +GetLocaleIso3166CountryName + +Gets the country name for a locale (via uloc_getCountry) and converts the result to UChars +*/ +UErrorCode GetLocaleIso3166CountryName(const char* locale, UChar* value, int32_t valueLength) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t length = uloc_getCountry(locale, nullptr, 0, &status); + + std::vector<char> buf(length + 1, '\0'); + status = U_ZERO_ERROR; + + uloc_getCountry(locale, buf.data(), length + 1, &status); + + if (U_SUCCESS(status)) + { + status = u_charsToUChars_safe(buf.data(), value, valueLength); + } + + return status; +} + +/* +PAL Function: +GetLocaleInfoString + +Obtains string locale information. +Returns 1 for success, 0 otherwise +*/ +extern "C" int32_t GlobalizationNative_GetLocaleInfoString( + const UChar* localeName, LocaleStringData localeStringData, UChar* value, int32_t valueLength) +{ + UErrorCode status = U_ZERO_ERROR; + char locale[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &status); + + if (U_FAILURE(status)) + { + return UErrorCodeToBool(U_ILLEGAL_ARGUMENT_ERROR); + } + + switch (localeStringData) + { + case LocalizedDisplayName: + uloc_getDisplayName(locale, DetectDefaultLocaleName(), value, valueLength, &status); + break; + case EnglishDisplayName: + uloc_getDisplayName(locale, ULOC_ENGLISH, value, valueLength, &status); + break; + case NativeDisplayName: + uloc_getDisplayName(locale, locale, value, valueLength, &status); + break; + case LocalizedLanguageName: + uloc_getDisplayLanguage(locale, DetectDefaultLocaleName(), value, valueLength, &status); + break; + case EnglishLanguageName: + uloc_getDisplayLanguage(locale, ULOC_ENGLISH, value, valueLength, &status); + break; + case NativeLanguageName: + uloc_getDisplayLanguage(locale, locale, value, valueLength, &status); + break; + case EnglishCountryName: + uloc_getDisplayCountry(locale, ULOC_ENGLISH, value, valueLength, &status); + break; + case NativeCountryName: + uloc_getDisplayCountry(locale, locale, value, valueLength, &status); + break; + case ListSeparator: + // fall through + case ThousandSeparator: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_GROUPING_SEPARATOR_SYMBOL, value, valueLength); + break; + case DecimalSeparator: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_DECIMAL_SEPARATOR_SYMBOL, value, valueLength); + break; + case Digits: + status = GetDigitSymbol(locale, status, UNUM_ZERO_DIGIT_SYMBOL, 0, value, valueLength); + // symbols UNUM_ONE_DIGIT to UNUM_NINE_DIGIT are contiguous + for (int32_t symbol = UNUM_ONE_DIGIT_SYMBOL; symbol <= UNUM_NINE_DIGIT_SYMBOL; symbol++) + { + int charIndex = symbol - UNUM_ONE_DIGIT_SYMBOL + 1; + status = GetDigitSymbol( + locale, status, static_cast<UNumberFormatSymbol>(symbol), charIndex, value, valueLength); + } + break; + case MonetarySymbol: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_CURRENCY_SYMBOL, value, valueLength); + break; + case Iso4217MonetarySymbol: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INTL_CURRENCY_SYMBOL, value, valueLength); + break; + case MonetaryDecimalSeparator: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_SEPARATOR_SYMBOL, value, valueLength); + break; + case MonetaryThousandSeparator: + status = + GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL, value, valueLength); + break; + case AMDesignator: + status = GetLocaleInfoAmPm(locale, true, value, valueLength); + break; + case PMDesignator: + status = GetLocaleInfoAmPm(locale, false, value, valueLength); + break; + case PositiveSign: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PLUS_SIGN_SYMBOL, value, valueLength); + break; + case NegativeSign: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MINUS_SIGN_SYMBOL, value, valueLength); + break; + case Iso639LanguageName: + status = GetLocaleIso639LanguageName(locale, value, valueLength); + break; + case Iso3166CountryName: + status = GetLocaleIso3166CountryName(locale, value, valueLength); + break; + case NaNSymbol: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_NAN_SYMBOL, value, valueLength); + break; + case PositiveInfinitySymbol: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INFINITY_SYMBOL, value, valueLength); + break; + case ParentName: + { + // ICU supports lang[-script][-region][-variant] so up to 4 parents + // including invariant locale + char localeNameTemp[ULOC_FULLNAME_CAPACITY]; + + uloc_getParent(locale, localeNameTemp, ULOC_FULLNAME_CAPACITY, &status); + if (U_SUCCESS(status)) + { + status = u_charsToUChars_safe(localeNameTemp, value, valueLength); + if (U_SUCCESS(status)) + { + FixupLocaleName(value, valueLength); + } + } + break; + } + case PercentSymbol: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERCENT_SYMBOL, value, valueLength); + break; + case PerMilleSymbol: + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERMILL_SYMBOL, value, valueLength); + break; + default: + status = U_UNSUPPORTED_ERROR; + break; + }; + + return UErrorCodeToBool(status); +} + +/* +PAL Function: +GetLocaleTimeFormat + +Obtains time format information (in ICU format, it needs to be coverted to .NET Format). +Returns 1 for success, 0 otherwise +*/ +extern "C" int32_t GlobalizationNative_GetLocaleTimeFormat( + const UChar* localeName, int shortFormat, UChar* value, int32_t valueLength) +{ + UErrorCode err = U_ZERO_ERROR; + char locale[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &err); + + if (U_FAILURE(err)) + { + return UErrorCodeToBool(U_ILLEGAL_ARGUMENT_ERROR); + } + + UDateFormatStyle style = (shortFormat != 0) ? UDAT_SHORT : UDAT_MEDIUM; + UDateFormat* pFormat = udat_open(style, UDAT_NONE, locale, nullptr, 0, nullptr, 0, &err); + UDateFormatHolder formatHolder(pFormat, err); + + if (U_FAILURE(err)) + return UErrorCodeToBool(err); + + udat_toPattern(pFormat, false, value, valueLength, &err); + + return UErrorCodeToBool(err); +} diff --git a/src/corefx/System.Globalization.Native/normalization.cpp b/src/corefx/System.Globalization.Native/normalization.cpp new file mode 100644 index 0000000000..f96f5ee315 --- /dev/null +++ b/src/corefx/System.Globalization.Native/normalization.cpp @@ -0,0 +1,88 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +#include <stdint.h> +#include <unicode/unorm2.h> + +/* + * These values should be kept in sync with System.Text.NormalizationForm + */ +enum class NormalizationForm : int32_t +{ + C = 0x1, + D = 0x2, + KC = 0x5, + KD = 0x6 +}; + +const UNormalizer2* GetNormalizerForForm(NormalizationForm normalizationForm, UErrorCode* pErrorCode) +{ + switch (normalizationForm) + { + case NormalizationForm::C: + return unorm2_getNFCInstance(pErrorCode); + case NormalizationForm::D: + return unorm2_getNFDInstance(pErrorCode); + case NormalizationForm::KC: + return unorm2_getNFKCInstance(pErrorCode); + case NormalizationForm::KD: + return unorm2_getNFKDInstance(pErrorCode); + } + + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; +} + +/* +Function: +IsNormalized + +Used by System.StringNormalizationExtensions.IsNormalized to detect if a string +is in a certain +Unicode Normalization Form. + +Return values: +0: lpStr is not normalized. +1: lpStr is normalized. +-1: internal error during normalization. +*/ +extern "C" int32_t GlobalizationNative_IsNormalized( + NormalizationForm normalizationForm, const UChar* lpStr, int32_t cwStrLength) +{ + UErrorCode err = U_ZERO_ERROR; + const UNormalizer2* pNormalizer = GetNormalizerForForm(normalizationForm, &err); + UBool isNormalized = unorm2_isNormalized(pNormalizer, lpStr, cwStrLength, &err); + + if (U_SUCCESS(err)) + { + return isNormalized == TRUE ? 1 : 0; + } + else + { + return -1; + } +} + +/* +Function: +NormalizeString + +Used by System.StringNormalizationExtensions.Normalize to normalize a string +into a certain +Unicode Normalization Form. + +Return values: +0: internal error during normalization. +>0: the length of the normalized string (not counting the null terminator). +*/ +extern "C" int32_t GlobalizationNative_NormalizeString( + NormalizationForm normalizationForm, const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) +{ + UErrorCode err = U_ZERO_ERROR; + const UNormalizer2* pNormalizer = GetNormalizerForForm(normalizationForm, &err); + int32_t normalizedLen = unorm2_normalize(pNormalizer, lpSrc, cwSrcLength, lpDst, cwDstLength, &err); + + return (U_SUCCESS(err) || (err == U_BUFFER_OVERFLOW_ERROR)) ? normalizedLen : 0; +} diff --git a/src/corefx/System.Globalization.Native/timeZoneInfo.cpp b/src/corefx/System.Globalization.Native/timeZoneInfo.cpp new file mode 100644 index 0000000000..d0e01e5ce6 --- /dev/null +++ b/src/corefx/System.Globalization.Native/timeZoneInfo.cpp @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +#include <stdint.h> +#include <unistd.h> +#include <unicode/ucal.h> + +#include "locale.hpp" +#include "holders.h" +#include "errors.h" + +/* +Gets the symlink value for the path. +*/ +extern "C" int32_t GlobalizationNative_ReadLink(const char* path, char* result, size_t resultCapacity) +{ + ssize_t r = readlink(path, result, resultCapacity - 1); // subtract one to make room for the NULL character + + if (r < 1 || r >= resultCapacity) + return false; + + result[r] = '\0'; + return true; +} + +/* +These values should be kept in sync with the managed Interop.GlobalizationInterop.TimeZoneDisplayNameType enum. +*/ +enum TimeZoneDisplayNameType : int32_t +{ + Generic = 0, + Standard = 1, + DaylightSavings = 2, +}; + +/* +Gets the localized display name for the specified time zone. +*/ +extern "C" ResultCode GlobalizationNative_GetTimeZoneDisplayName( + const UChar* localeName, const UChar* timeZoneId, TimeZoneDisplayNameType type, UChar* result, int32_t resultLength) +{ + UErrorCode err = U_ZERO_ERROR; + char locale[ULOC_FULLNAME_CAPACITY]; + GetLocale(localeName, locale, ULOC_FULLNAME_CAPACITY, false, &err); + + int32_t timeZoneIdLength = -1; // timeZoneId is NULL-terminated + UCalendar* calendar = ucal_open(timeZoneId, timeZoneIdLength, locale, UCAL_DEFAULT, &err); + UCalendarHolder calendarHolder(calendar, err); + + // TODO (https://github.com/dotnet/corefx/issues/5741): need to support Generic names, but ICU "C" api + // has no public option for this. For now, just use the ICU standard name for both Standard and Generic + // (which is the same behavior on Windows with the mincore TIME_ZONE_INFORMATION APIs). + ucal_getTimeZoneDisplayName( + calendar, type == DaylightSavings ? UCAL_DST : UCAL_STANDARD, locale, result, resultLength, &err); + + return GetResultCode(err); +} |