Add wrappers for ICU Casing

Unlike ICU, the CLR only wants to preform simple casing, so we can't use the ICU APIs that work over strings. Instead we have to do codepoint by codepoint casing ourselves.
author: Matt Ellis <matell@microsoft.com> 2015-07-16 13:33:03 -0700
committer: Matt Ellis <matell@microsoft.com> 2015-09-22 11:48:35 -0700
commit: f73c7f7a98efc372bbb35e9e07e3ae62e96bb135 (patch)
tree: 6cc8d8949dfc0e63f9270ce582543c2ef704fe73 /src/corefx
parent: 4cd931dabc412d92ac32b37f0b79111ca466eb34 (diff)
download: coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.tar.gz
coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.tar.bz2
coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.zip
2 files changed, 121 insertions, 0 deletions
diff --git a/src/corefx/System.Globalization.Native/CMakeLists.txt b/src/corefx/System.Globalization.Native/CMakeLists.txt
index 9fc28d3ad0..47b5dd5bd3 100644
--- a/src/corefx/System.Globalization.Native/CMakeLists.txt
+++ b/src/corefx/System.Globalization.Native/CMakeLists.txt
@@ -15,6 +15,7 @@ endif()
 add_compile_options(-fPIC)
 
 set(NATIVEGLOBALIZATION_SOURCES
+    casing.cpp
     idna.cpp
     normalization.cpp
 )
diff --git a/src/corefx/System.Globalization.Native/casing.cpp b/src/corefx/System.Globalization.Native/casing.cpp
new file mode 100644
index 0000000000..e7cd85b3f0
--- /dev/null
+++ b/src/corefx/System.Globalization.Native/casing.cpp
@@ -0,0 +1,120 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+#include <assert.h>
+#include <stdint.h>
+#include <unicode/uchar.h>
+#include <unicode/utf16.h>
+
+/*
+Function:
+ToUpperSimple
+*/
+extern "C" void ToUpperSimple(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
+{
+    int32_t srcIdx = 0;
+    int32_t dstIdx = 0;
+
+    UBool isError = FALSE;
+
+    while (srcIdx < cwSrcLength)
+    {
+        UChar32 srcCodepoint;
+        UChar32 dstCodepoint;
+
+        U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
+        dstCodepoint = u_toupper(srcCodepoint);
+
+        U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+
+        // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same
+        // number of code units as the cased codepoint.
+        assert(isError == FALSE && srcIdx == dstIdx);
+    }
+}
+
+/*
+Function:
+ToLowerSimple
+*/
+extern "C" void ToLowerSimple(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
+{
+    int32_t srcIdx = 0;
+    int32_t dstIdx = 0;
+
+    UBool isError = FALSE;
+
+    while (srcIdx < cwSrcLength)
+    {
+        UChar32 srcCodepoint;
+        UChar32 dstCodepoint;
+
+        U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
+        dstCodepoint = u_tolower(srcCodepoint);
+
+        U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+
+        // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same
+        // number of code units as the cased codepoint.
+        assert(isError == FALSE && srcIdx == dstIdx);
+    }
+}
+
+/*
+Function:
+ToUpperSimpleTurkishAzeri
+*/
+extern "C" void ToUpperSimpleTurkishAzeri(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
+{
+    int32_t srcIdx = 0;
+    int32_t dstIdx = 0;
+
+    UBool isError = FALSE;
+
+    while (srcIdx < cwSrcLength)
+    {
+        UChar32 srcCodepoint;
+        UChar32 dstCodepoint;
+
+        U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
+
+        dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint));
+
+        U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+
+        // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same
+        // number of code units as the cased codepoint.
+        assert(isError == FALSE && srcIdx == dstIdx);
+    }
+}
+
+/*
+Function:
+ToLowerSimpleTurkishAzeri
+*/
+extern "C" void ToLowerSimpleTurkishAzeri(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
+{
+    int32_t srcIdx = 0;
+    int32_t dstIdx = 0;
+
+    UBool isError = FALSE;
+
+    while (srcIdx < cwSrcLength)
+    {
+        UChar32 srcCodepoint;
+        UChar32 dstCodepoint;
+
+        U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
+
+        dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint));
+
+        U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+
+        // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same
+        // number of code units as the cased codepoint.
+        assert(isError == FALSE && srcIdx == dstIdx);
+    }
+}
+
author	Matt Ellis <matell@microsoft.com>	2015-07-16 13:33:03 -0700
committer	Matt Ellis <matell@microsoft.com>	2015-09-22 11:48:35 -0700
commit	f73c7f7a98efc372bbb35e9e07e3ae62e96bb135 (patch)
tree	6cc8d8949dfc0e63f9270ce582543c2ef704fe73 /src/corefx
parent	4cd931dabc412d92ac32b37f0b79111ca466eb34 (diff)
download	coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.tar.gz coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.tar.bz2 coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.zip