summaryrefslogtreecommitdiff
path: root/src/corefx/System.Globalization.Native/locale.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corefx/System.Globalization.Native/locale.cpp')
-rw-r--r--src/corefx/System.Globalization.Native/locale.cpp206
1 files changed, 206 insertions, 0 deletions
diff --git a/src/corefx/System.Globalization.Native/locale.cpp b/src/corefx/System.Globalization.Native/locale.cpp
new file mode 100644
index 0000000000..1cb564a45a
--- /dev/null
+++ b/src/corefx/System.Globalization.Native/locale.cpp
@@ -0,0 +1,206 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <locale.h>
+
+#include "locale.hpp"
+
+int32_t UErrorCodeToBool(UErrorCode status)
+{
+ if (U_SUCCESS(status))
+ {
+ return 1;
+ }
+
+ // assert errors that should never occur
+ assert(status != U_BUFFER_OVERFLOW_ERROR);
+ assert(status != U_INTERNAL_PROGRAM_ERROR);
+
+ // add possible SetLastError support here
+
+ return 0;
+}
+
+int32_t GetLocale(
+ const UChar* localeName, char* localeNameResult, int32_t localeNameResultLength, bool canonicalize, UErrorCode* err)
+{
+ char localeNameTemp[ULOC_FULLNAME_CAPACITY] = {0};
+ int32_t localeLength;
+
+ // Convert ourselves instead of doing u_UCharsToChars as that function considers '@' a variant and stops.
+ for (int i = 0; i < ULOC_FULLNAME_CAPACITY - 1; i++)
+ {
+ UChar c = localeName[i];
+
+ if (c > (UChar)0x7F)
+ {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ return ULOC_FULLNAME_CAPACITY;
+ }
+
+ localeNameTemp[i] = (char)c;
+
+ if (c == (UChar)0x0)
+ {
+ break;
+ }
+ }
+
+ if (canonicalize)
+ {
+ localeLength = uloc_canonicalize(localeNameTemp, localeNameResult, localeNameResultLength, err);
+ }
+ else
+ {
+ localeLength = uloc_getName(localeNameTemp, localeNameResult, localeNameResultLength, err);
+ }
+
+ if (U_SUCCESS(*err))
+ {
+ // Make sure the "language" part of the locale is reasonable (i.e. we can fetch it and it is within range).
+ // This mimics how the C++ ICU API determines if a locale is "bogus" or not.
+
+ char language[ULOC_LANG_CAPACITY];
+ uloc_getLanguage(localeNameTemp, language, ULOC_LANG_CAPACITY, err);
+
+ if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING)
+ {
+ // ULOC_LANG_CAPACITY includes the null terminator, so if we couldn't extract the language with the null
+ // terminator, the language must be invalid.
+
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+
+ return localeLength;
+}
+
+UErrorCode u_charsToUChars_safe(const char* str, UChar* value, int32_t valueLength)
+{
+ int len = strlen(str);
+
+ if (len >= valueLength)
+ {
+ return U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ u_charsToUChars(str, value, len + 1);
+ return U_ZERO_ERROR;
+}
+
+int32_t FixupLocaleName(UChar* value, int32_t valueLength)
+{
+ int32_t i = 0;
+ for (; i < valueLength; i++)
+ {
+ if (value[i] == (UChar)'\0')
+ {
+ break;
+ }
+ else if (value[i] == (UChar)'_')
+ {
+ value[i] = (UChar)'-';
+ }
+ }
+
+ return i;
+}
+
+bool IsEnvVarSet(const char* name)
+{
+ const char* value = getenv(name);
+
+ return (value != nullptr) && (strcmp("", value) != 0);
+}
+
+// The behavior of uloc_getDefault() on POSIX systems is to query
+// setlocale(LC_MESSAGES) and use that value, unless it is C or
+// POSIX. In that case it tries to read LC_ALL, LC_MESSAGES and LANG
+// and then falls back to en_US_POSIX if none of them are set.
+//
+// en_US_POSIX is a weird locale since the collation rules treat 'a'
+// and 'A' as different letters even when ignoring case. Furthermore
+// it's common for LC_ALL, LC_MESSAGES and LANG to be unset when
+// running under Docker.
+//
+// We'd rather default to invariant in this case. If any of these
+// are set, we'll just call into ICU and let it do whatever
+// normalization it would do.
+const char* DetectDefaultLocaleName()
+{
+ char* loc = setlocale(LC_MESSAGES, nullptr);
+
+ if (loc != nullptr && (strcmp("C", loc) == 0 || strcmp("POSIX", loc) == 0))
+ {
+ if (!IsEnvVarSet("LC_ALL") && !IsEnvVarSet("LC_MESSAGES") && !IsEnvVarSet("LANG"))
+ {
+ return "";
+ }
+ }
+
+ return uloc_getDefault();
+}
+
+extern "C" int32_t GlobalizationNative_GetLocaleName(const UChar* localeName, UChar* value, int32_t valueLength)
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ char localeNameBuffer[ULOC_FULLNAME_CAPACITY];
+ GetLocale(localeName, localeNameBuffer, ULOC_FULLNAME_CAPACITY, true, &status);
+
+ if (U_SUCCESS(status))
+ {
+ status = u_charsToUChars_safe(localeNameBuffer, value, valueLength);
+
+ if (U_SUCCESS(status))
+ {
+ FixupLocaleName(value, valueLength);
+ }
+ }
+
+ return UErrorCodeToBool(status);
+}
+
+extern "C" int32_t GlobalizationNative_GetDefaultLocaleName(UChar* value, int32_t valueLength)
+{
+ char localeNameBuffer[ULOC_FULLNAME_CAPACITY];
+ UErrorCode status = U_ZERO_ERROR;
+
+ const char* defaultLocale = DetectDefaultLocaleName();
+
+ uloc_getBaseName(defaultLocale, localeNameBuffer, ULOC_FULLNAME_CAPACITY, &status);
+
+ if (U_SUCCESS(status))
+ {
+ status = u_charsToUChars_safe(localeNameBuffer, value, valueLength);
+
+ if (U_SUCCESS(status))
+ {
+ int localeNameLen = FixupLocaleName(value, valueLength);
+
+ char collationValueTemp[ULOC_KEYWORDS_CAPACITY];
+ int32_t collationLen =
+ uloc_getKeywordValue(defaultLocale, "collation", collationValueTemp, ULOC_KEYWORDS_CAPACITY, &status);
+
+ if (U_SUCCESS(status) && collationLen > 0)
+ {
+ // copy the collation; managed uses a "_" to represent collation (not
+ // "@collation=")
+ status = u_charsToUChars_safe("_", &value[localeNameLen], valueLength - localeNameLen);
+ if (U_SUCCESS(status))
+ {
+ status = u_charsToUChars_safe(
+ collationValueTemp, &value[localeNameLen + 1], valueLength - localeNameLen - 1);
+ }
+ }
+ }
+ }
+
+ return UErrorCodeToBool(status);
+}