diff options
author | Matt Ellis <matell@microsoft.com> | 2015-11-19 15:00:20 -0800 |
---|---|---|
committer | Matt Ellis <matell@microsoft.com> | 2015-11-20 13:33:34 -0800 |
commit | 6e2b263b0b1925f1e1d99c652afef460b63c620d (patch) | |
tree | 4728b85db89da95c33270b3ed534d905b2df506b /src/corefx | |
parent | daa24b15ea785252397bf105a6788d01b7fb936e (diff) | |
download | coreclr-6e2b263b0b1925f1e1d99c652afef460b63c620d.tar.gz coreclr-6e2b263b0b1925f1e1d99c652afef460b63c620d.tar.bz2 coreclr-6e2b263b0b1925f1e1d99c652afef460b63c620d.zip |
Cache UCollators in CompareInfo
Creating a UCollator is an expensive operation and we are presently
doing it on ever collation operation. We can improve this by caching
the UCollators we use for collation on the CompareInfo object itself.
This change introduces a new method GetSortHandle which gives back an
opaque wrapper which can be used in collation operations instead of a
culture name.
Internally we represent this is a struct holding the two types of
UCollators we care about (if we add additional collators per locale with
different options to handle other types of CompareOption flags, we can
cache these as well). Collation methods can get a `const UCollator*`
reference from the sort handle which is safe to share across
threads (per the ICU Design Guidelines[1]).
Unfortunately, tracking the lifetime of the SortHandle itself is not as
straightfoward as I would like. Right now, we use a SafeHandle to wrap
the internal handle and rely on the finalizer of the class to clean up
the native resources. However this means that the following code sample
will create two finalizable objects:
```csharp
var c1 = new CultureInfo("en-US").CompareInfo;
var c2 = new CultureInfo("en-US").CompareInfo;
```
If this ends up being an issue, we could explore an approach where we
keep a cahce of SortHandles in managed code and pass out references to
that SortHandle which would let us share a single SortHandle for a given
locale across more than one CompareInfo object.
Wins are seeing in places where we previously did lots of string
comparisions in a tight loop (for example: dotnet/corefx#3811) moving
these operations down to ~6ms per iteration vs ~330ms on my local machine.
[1]: http://userguide.icu-project.org/design
Diffstat (limited to 'src/corefx')
-rw-r--r-- | src/corefx/System.Globalization.Native/collation.cpp | 109 |
1 files changed, 78 insertions, 31 deletions
diff --git a/src/corefx/System.Globalization.Native/collation.cpp b/src/corefx/System.Globalization.Native/collation.cpp index fadaa73eac..fd6e038ea7 100644 --- a/src/corefx/System.Globalization.Native/collation.cpp +++ b/src/corefx/System.Globalization.Native/collation.cpp @@ -19,31 +19,89 @@ const int32_t CompareOptionsIgnoreCase = 1; // const int32_t CompareOptionsStringSort = 0x20000000; /* + * For increased performance, we cache the UCollator objects for a locale and + * share them across threads. This is safe (and supported in ICU) if we ensure + * multiple threads are only ever dealing with const UCollators. + */ +typedef struct _sort_handle +{ + UCollator* regular; + UCollator* ignoreCase; + + _sort_handle() : regular(nullptr), ignoreCase(nullptr) + { + } + +} SortHandle; + +/* * To collator returned by this function is owned by the callee and must be - *closed when this method returns - * with a U_SUCCESS UErrorCode. + * closed when this method returns with a U_SUCCESS UErrorCode. * * On error, the return value is undefined. */ -UCollator* GetCollatorForLocaleAndOptions(const char* lpLocaleName, int32_t options, UErrorCode* pErr) +UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr) +{ + UCollator* pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr); + + if ((options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase) + { + ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, UCOL_SECONDARY, pErr); + } + + return pClonedCollator; +} + +extern "C" SortHandle* GetSortHandle(const char* lpLocaleName) +{ + SortHandle* pSortHandle = new SortHandle(); + + UErrorCode err = U_ZERO_ERROR; + + pSortHandle->regular = ucol_open(lpLocaleName, &err); + pSortHandle->ignoreCase = CloneCollatorWithOptions(pSortHandle->regular, CompareOptionsIgnoreCase, &err); + + if (U_FAILURE(err)) + { + if (pSortHandle->regular != nullptr) + ucol_close(pSortHandle->regular); + + if (pSortHandle->ignoreCase != nullptr) + ucol_close(pSortHandle->ignoreCase); + + delete pSortHandle; + pSortHandle = nullptr; + } + + return pSortHandle; +} + +extern "C" void CloseSortHandle(SortHandle* pSortHandle) { - UCollator* pColl = nullptr; + ucol_close(pSortHandle->regular); + ucol_close(pSortHandle->ignoreCase); + + pSortHandle->regular = nullptr; + pSortHandle->ignoreCase = nullptr; - pColl = ucol_open(lpLocaleName, pErr); + delete pSortHandle; +} +const UCollator* GetCollatorFromSortHandle(const SortHandle* pSortHandle, int32_t options, UErrorCode* pErr) +{ if ((options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase) { - ucol_setAttribute(pColl, UCOL_STRENGTH, UCOL_SECONDARY, pErr); + return pSortHandle->ignoreCase; } - return pColl; + return pSortHandle->regular; } /* Function: CompareString */ -extern "C" int32_t CompareString(const char* lpLocaleName, +extern "C" int32_t CompareString(const SortHandle* pSortHandle, const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, @@ -56,12 +114,11 @@ extern "C" int32_t CompareString(const char* lpLocaleName, UCollationResult result = UCOL_EQUAL; UErrorCode err = U_ZERO_ERROR; - UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); if (U_SUCCESS(err)) { result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length); - ucol_close(pColl); } return result; @@ -72,13 +129,13 @@ Function: IndexOf */ extern "C" int32_t -IndexOf(const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) +IndexOf(const SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found"); int32_t result = USEARCH_DONE; UErrorCode err = U_ZERO_ERROR; - UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); if (U_SUCCESS(err)) { @@ -89,8 +146,6 @@ IndexOf(const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, result = usearch_first(pSearch, &err); usearch_close(pSearch); } - - ucol_close(pColl); } return result; @@ -101,13 +156,13 @@ Function: LastIndexOf */ extern "C" int32_t LastIndexOf( - const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) + const SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found"); int32_t result = USEARCH_DONE; UErrorCode err = U_ZERO_ERROR; - UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); if (U_SUCCESS(err)) { @@ -118,8 +173,6 @@ extern "C" int32_t LastIndexOf( result = usearch_last(pSearch, &err); usearch_close(pSearch); } - - ucol_close(pColl); } return result; @@ -202,11 +255,11 @@ IndexOfOrdinalIgnoreCase( Return value is a "Win32 BOOL" (1 = true, 0 = false) */ extern "C" int32_t StartsWith( - const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) + const SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { int32_t result = FALSE; UErrorCode err = U_ZERO_ERROR; - UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); if (U_SUCCESS(err)) { @@ -255,8 +308,6 @@ extern "C" int32_t StartsWith( usearch_close(pSearch); } - - ucol_close(pColl); } return result; @@ -266,11 +317,11 @@ extern "C" int32_t StartsWith( Return value is a "Win32 BOOL" (1 = true, 0 = false) */ extern "C" int32_t EndsWith( - const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) + const SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options) { int32_t result = FALSE; UErrorCode err = U_ZERO_ERROR; - UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); if (U_SUCCESS(err)) { @@ -290,19 +341,17 @@ extern "C" int32_t EndsWith( // TODO (dotnet/corefx#3467): We should do something similar to what // StartsWith does where we can ignore - // some collation elements at the end of te string if they are zero. + // some collation elements at the end of the string if they are zero. } usearch_close(pSearch); } - - ucol_close(pColl); } return result; } -extern "C" int32_t GetSortKey(const char* lpLocaleName, +extern "C" int32_t GetSortKey(const SortHandle* pSortHandle, const UChar* lpStr, int32_t cwStrLength, uint8_t* sortKey, @@ -310,14 +359,12 @@ extern "C" int32_t GetSortKey(const char* lpLocaleName, int32_t options) { UErrorCode err = U_ZERO_ERROR; - UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); + const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); int32_t result = 0; if (U_SUCCESS(err)) { result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength); - - ucol_close(pColl); } return result; |