summaryrefslogtreecommitdiff
path: root/src/corefx
diff options
context:
space:
mode:
authorMatt Ellis <matell@microsoft.com>2015-11-19 15:00:20 -0800
committerMatt Ellis <matell@microsoft.com>2015-11-20 13:33:34 -0800
commit6e2b263b0b1925f1e1d99c652afef460b63c620d (patch)
tree4728b85db89da95c33270b3ed534d905b2df506b /src/corefx
parentdaa24b15ea785252397bf105a6788d01b7fb936e (diff)
downloadcoreclr-6e2b263b0b1925f1e1d99c652afef460b63c620d.tar.gz
coreclr-6e2b263b0b1925f1e1d99c652afef460b63c620d.tar.bz2
coreclr-6e2b263b0b1925f1e1d99c652afef460b63c620d.zip
Cache UCollators in CompareInfo
Creating a UCollator is an expensive operation and we are presently doing it on ever collation operation. We can improve this by caching the UCollators we use for collation on the CompareInfo object itself. This change introduces a new method GetSortHandle which gives back an opaque wrapper which can be used in collation operations instead of a culture name. Internally we represent this is a struct holding the two types of UCollators we care about (if we add additional collators per locale with different options to handle other types of CompareOption flags, we can cache these as well). Collation methods can get a `const UCollator*` reference from the sort handle which is safe to share across threads (per the ICU Design Guidelines[1]). Unfortunately, tracking the lifetime of the SortHandle itself is not as straightfoward as I would like. Right now, we use a SafeHandle to wrap the internal handle and rely on the finalizer of the class to clean up the native resources. However this means that the following code sample will create two finalizable objects: ```csharp var c1 = new CultureInfo("en-US").CompareInfo; var c2 = new CultureInfo("en-US").CompareInfo; ``` If this ends up being an issue, we could explore an approach where we keep a cahce of SortHandles in managed code and pass out references to that SortHandle which would let us share a single SortHandle for a given locale across more than one CompareInfo object. Wins are seeing in places where we previously did lots of string comparisions in a tight loop (for example: dotnet/corefx#3811) moving these operations down to ~6ms per iteration vs ~330ms on my local machine. [1]: http://userguide.icu-project.org/design
Diffstat (limited to 'src/corefx')
-rw-r--r--src/corefx/System.Globalization.Native/collation.cpp109
1 files changed, 78 insertions, 31 deletions
diff --git a/src/corefx/System.Globalization.Native/collation.cpp b/src/corefx/System.Globalization.Native/collation.cpp
index fadaa73eac..fd6e038ea7 100644
--- a/src/corefx/System.Globalization.Native/collation.cpp
+++ b/src/corefx/System.Globalization.Native/collation.cpp
@@ -19,31 +19,89 @@ const int32_t CompareOptionsIgnoreCase = 1;
// const int32_t CompareOptionsStringSort = 0x20000000;
/*
+ * For increased performance, we cache the UCollator objects for a locale and
+ * share them across threads. This is safe (and supported in ICU) if we ensure
+ * multiple threads are only ever dealing with const UCollators.
+ */
+typedef struct _sort_handle
+{
+ UCollator* regular;
+ UCollator* ignoreCase;
+
+ _sort_handle() : regular(nullptr), ignoreCase(nullptr)
+ {
+ }
+
+} SortHandle;
+
+/*
* To collator returned by this function is owned by the callee and must be
- *closed when this method returns
- * with a U_SUCCESS UErrorCode.
+ * closed when this method returns with a U_SUCCESS UErrorCode.
*
* On error, the return value is undefined.
*/
-UCollator* GetCollatorForLocaleAndOptions(const char* lpLocaleName, int32_t options, UErrorCode* pErr)
+UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr)
+{
+ UCollator* pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr);
+
+ if ((options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase)
+ {
+ ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, UCOL_SECONDARY, pErr);
+ }
+
+ return pClonedCollator;
+}
+
+extern "C" SortHandle* GetSortHandle(const char* lpLocaleName)
+{
+ SortHandle* pSortHandle = new SortHandle();
+
+ UErrorCode err = U_ZERO_ERROR;
+
+ pSortHandle->regular = ucol_open(lpLocaleName, &err);
+ pSortHandle->ignoreCase = CloneCollatorWithOptions(pSortHandle->regular, CompareOptionsIgnoreCase, &err);
+
+ if (U_FAILURE(err))
+ {
+ if (pSortHandle->regular != nullptr)
+ ucol_close(pSortHandle->regular);
+
+ if (pSortHandle->ignoreCase != nullptr)
+ ucol_close(pSortHandle->ignoreCase);
+
+ delete pSortHandle;
+ pSortHandle = nullptr;
+ }
+
+ return pSortHandle;
+}
+
+extern "C" void CloseSortHandle(SortHandle* pSortHandle)
{
- UCollator* pColl = nullptr;
+ ucol_close(pSortHandle->regular);
+ ucol_close(pSortHandle->ignoreCase);
+
+ pSortHandle->regular = nullptr;
+ pSortHandle->ignoreCase = nullptr;
- pColl = ucol_open(lpLocaleName, pErr);
+ delete pSortHandle;
+}
+const UCollator* GetCollatorFromSortHandle(const SortHandle* pSortHandle, int32_t options, UErrorCode* pErr)
+{
if ((options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase)
{
- ucol_setAttribute(pColl, UCOL_STRENGTH, UCOL_SECONDARY, pErr);
+ return pSortHandle->ignoreCase;
}
- return pColl;
+ return pSortHandle->regular;
}
/*
Function:
CompareString
*/
-extern "C" int32_t CompareString(const char* lpLocaleName,
+extern "C" int32_t CompareString(const SortHandle* pSortHandle,
const UChar* lpStr1,
int32_t cwStr1Length,
const UChar* lpStr2,
@@ -56,12 +114,11 @@ extern "C" int32_t CompareString(const char* lpLocaleName,
UCollationResult result = UCOL_EQUAL;
UErrorCode err = U_ZERO_ERROR;
- UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+ const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
if (U_SUCCESS(err))
{
result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length);
- ucol_close(pColl);
}
return result;
@@ -72,13 +129,13 @@ Function:
IndexOf
*/
extern "C" int32_t
-IndexOf(const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
+IndexOf(const SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
{
static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found");
int32_t result = USEARCH_DONE;
UErrorCode err = U_ZERO_ERROR;
- UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+ const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
if (U_SUCCESS(err))
{
@@ -89,8 +146,6 @@ IndexOf(const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength,
result = usearch_first(pSearch, &err);
usearch_close(pSearch);
}
-
- ucol_close(pColl);
}
return result;
@@ -101,13 +156,13 @@ Function:
LastIndexOf
*/
extern "C" int32_t LastIndexOf(
- const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
+ const SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
{
static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found");
int32_t result = USEARCH_DONE;
UErrorCode err = U_ZERO_ERROR;
- UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+ const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
if (U_SUCCESS(err))
{
@@ -118,8 +173,6 @@ extern "C" int32_t LastIndexOf(
result = usearch_last(pSearch, &err);
usearch_close(pSearch);
}
-
- ucol_close(pColl);
}
return result;
@@ -202,11 +255,11 @@ IndexOfOrdinalIgnoreCase(
Return value is a "Win32 BOOL" (1 = true, 0 = false)
*/
extern "C" int32_t StartsWith(
- const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
+ const SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
{
int32_t result = FALSE;
UErrorCode err = U_ZERO_ERROR;
- UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+ const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
if (U_SUCCESS(err))
{
@@ -255,8 +308,6 @@ extern "C" int32_t StartsWith(
usearch_close(pSearch);
}
-
- ucol_close(pColl);
}
return result;
@@ -266,11 +317,11 @@ extern "C" int32_t StartsWith(
Return value is a "Win32 BOOL" (1 = true, 0 = false)
*/
extern "C" int32_t EndsWith(
- const char* lpLocaleName, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
+ const SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
{
int32_t result = FALSE;
UErrorCode err = U_ZERO_ERROR;
- UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+ const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
if (U_SUCCESS(err))
{
@@ -290,19 +341,17 @@ extern "C" int32_t EndsWith(
// TODO (dotnet/corefx#3467): We should do something similar to what
// StartsWith does where we can ignore
- // some collation elements at the end of te string if they are zero.
+ // some collation elements at the end of the string if they are zero.
}
usearch_close(pSearch);
}
-
- ucol_close(pColl);
}
return result;
}
-extern "C" int32_t GetSortKey(const char* lpLocaleName,
+extern "C" int32_t GetSortKey(const SortHandle* pSortHandle,
const UChar* lpStr,
int32_t cwStrLength,
uint8_t* sortKey,
@@ -310,14 +359,12 @@ extern "C" int32_t GetSortKey(const char* lpLocaleName,
int32_t options)
{
UErrorCode err = U_ZERO_ERROR;
- UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+ const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
int32_t result = 0;
if (U_SUCCESS(err))
{
result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength);
-
- ucol_close(pColl);
}
return result;