diff options
author | Matt Ellis <matell@microsoft.com> | 2015-07-16 13:33:03 -0700 |
---|---|---|
committer | Matt Ellis <matell@microsoft.com> | 2015-09-22 11:48:35 -0700 |
commit | f73c7f7a98efc372bbb35e9e07e3ae62e96bb135 (patch) | |
tree | 6cc8d8949dfc0e63f9270ce582543c2ef704fe73 /src/corefx | |
parent | 4cd931dabc412d92ac32b37f0b79111ca466eb34 (diff) | |
download | coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.tar.gz coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.tar.bz2 coreclr-f73c7f7a98efc372bbb35e9e07e3ae62e96bb135.zip |
Add wrappers for ICU Casing
Unlike ICU, the CLR only wants to preform simple casing, so we can't
use the ICU APIs that work over strings. Instead we have to do
codepoint by codepoint casing ourselves.
Diffstat (limited to 'src/corefx')
-rw-r--r-- | src/corefx/System.Globalization.Native/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/corefx/System.Globalization.Native/casing.cpp | 120 |
2 files changed, 121 insertions, 0 deletions
diff --git a/src/corefx/System.Globalization.Native/CMakeLists.txt b/src/corefx/System.Globalization.Native/CMakeLists.txt index 9fc28d3ad0..47b5dd5bd3 100644 --- a/src/corefx/System.Globalization.Native/CMakeLists.txt +++ b/src/corefx/System.Globalization.Native/CMakeLists.txt @@ -15,6 +15,7 @@ endif() add_compile_options(-fPIC) set(NATIVEGLOBALIZATION_SOURCES + casing.cpp idna.cpp normalization.cpp ) diff --git a/src/corefx/System.Globalization.Native/casing.cpp b/src/corefx/System.Globalization.Native/casing.cpp new file mode 100644 index 0000000000..e7cd85b3f0 --- /dev/null +++ b/src/corefx/System.Globalization.Native/casing.cpp @@ -0,0 +1,120 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#include <assert.h> +#include <stdint.h> +#include <unicode/uchar.h> +#include <unicode/utf16.h> + +/* +Function: +ToUpperSimple +*/ +extern "C" void ToUpperSimple(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) +{ + int32_t srcIdx = 0; + int32_t dstIdx = 0; + + UBool isError = FALSE; + + while (srcIdx < cwSrcLength) + { + UChar32 srcCodepoint; + UChar32 dstCodepoint; + + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + dstCodepoint = u_toupper(srcCodepoint); + + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + + // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same + // number of code units as the cased codepoint. + assert(isError == FALSE && srcIdx == dstIdx); + } +} + +/* +Function: +ToLowerSimple +*/ +extern "C" void ToLowerSimple(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) +{ + int32_t srcIdx = 0; + int32_t dstIdx = 0; + + UBool isError = FALSE; + + while (srcIdx < cwSrcLength) + { + UChar32 srcCodepoint; + UChar32 dstCodepoint; + + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + dstCodepoint = u_tolower(srcCodepoint); + + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + + // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same + // number of code units as the cased codepoint. + assert(isError == FALSE && srcIdx == dstIdx); + } +} + +/* +Function: +ToUpperSimpleTurkishAzeri +*/ +extern "C" void ToUpperSimpleTurkishAzeri(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) +{ + int32_t srcIdx = 0; + int32_t dstIdx = 0; + + UBool isError = FALSE; + + while (srcIdx < cwSrcLength) + { + UChar32 srcCodepoint; + UChar32 dstCodepoint; + + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + + dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint)); + + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + + // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same + // number of code units as the cased codepoint. + assert(isError == FALSE && srcIdx == dstIdx); + } +} + +/* +Function: +ToLowerSimpleTurkishAzeri +*/ +extern "C" void ToLowerSimpleTurkishAzeri(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) +{ + int32_t srcIdx = 0; + int32_t dstIdx = 0; + + UBool isError = FALSE; + + while (srcIdx < cwSrcLength) + { + UChar32 srcCodepoint; + UChar32 dstCodepoint; + + U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); + + dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint)); + + U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); + + // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same + // number of code units as the cased codepoint. + assert(isError == FALSE && srcIdx == dstIdx); + } +} + |