summaryrefslogtreecommitdiff
path: root/src/mscorlib
diff options
context:
space:
mode:
authorstephentoub <stoub@microsoft.com>2015-10-20 15:34:38 -0400
committerstephentoub <stoub@microsoft.com>2015-10-22 14:31:58 -0400
commit80800eb0ead17dc724275d426904755b3bc64139 (patch)
tree319fdadf2945c08ff72f3bd0b43955abb64e8455 /src/mscorlib
parent3ddea17fc4e5b0c9ff2cb0651be82594a0eddffb (diff)
downloadcoreclr-80800eb0ead17dc724275d426904755b3bc64139.tar.gz
coreclr-80800eb0ead17dc724275d426904755b3bc64139.tar.bz2
coreclr-80800eb0ead17dc724275d426904755b3bc64139.zip
Improve string.{Last}IndexOf perf on Unix for Ordinal/OrdinalIgnoreCase
Our current implementation of IndexOfOrdinal for strings on Unix uses Substring to get the piece of the source string we care about; this results in an unnecessary allocation / string copy. When using OrdinalIgnoreCase, we also convert both the source and search strings to upper-case using ToUpperInvariant, resulting in more allocations. And our LastIndexOfOrdinal implementation delegates to IndexOfOrdinal repeatedly, incurring such allocations potentially multiple times. This change reimplements Ordinal searching in managed code to not use Substring, and it implements OrdinalIgnoreCase searching via new functions exposed in the native globalization shim, so as to use ICU without having to make managed/native transitions for each character. With the changes, {Last}IndexOf with Ordinal/OrdinalIgnoreCase are now allocateion-free (as you'd expect), and throughput when startIndex/count and/or OrdinalIgnoreCase are used is increased significantly, on my machine anywhere from 20% to 3x, depending on the inputs.
Diffstat (limited to 'src/mscorlib')
-rw-r--r--src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs6
-rw-r--r--src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs80
2 files changed, 57 insertions, 29 deletions
diff --git a/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs b/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs
index b600fa59ef..1a3654ee29 100644
--- a/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs
+++ b/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs
@@ -19,6 +19,12 @@ internal static partial class Interop
internal unsafe static extern int LastIndexOf(byte[] localeName, string target, char* pSource, int cwSourceLength, CompareOptions options);
[DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+ internal unsafe static extern int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength);
+
+ [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+ internal unsafe static extern int LastIndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength);
+
+ [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
[return: MarshalAs(UnmanagedType.Bool)]
internal unsafe static extern bool StartsWith(byte[] localeName, string target, string source, int cwSourceLength, CompareOptions options);
diff --git a/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs b/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs
index ba658d0f5e..8df1d79612 100644
--- a/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs
+++ b/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs
@@ -20,7 +20,7 @@ namespace System.Globalization
m_sortNameAsUtf8 = System.Text.Encoding.UTF8.GetBytes(m_sortName);
}
- internal static int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
+ internal static unsafe int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
{
Contract.Assert(source != null);
Contract.Assert(value != null);
@@ -30,33 +30,41 @@ namespace System.Globalization
return startIndex;
}
- // TODO (dotnet/corefx#3468): Move this into the shim so we don't have to do the ToUpper or call substring.
+ if (count < value.Length)
+ {
+ return -1;
+ }
if (ignoreCase)
{
- source = source.ToUpper(CultureInfo.InvariantCulture);
- value = value.ToUpper(CultureInfo.InvariantCulture);
+ fixed (char* pSource = source)
+ {
+ int index = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count);
+ return index != -1 ?
+ startIndex + index :
+ -1;
+ }
}
- source = source.Substring(startIndex, count);
-
- for (int i = 0; i + value.Length <= source.Length; i++)
+ int endIndex = startIndex + (count - value.Length);
+ for (int i = startIndex; i <= endIndex; i++)
{
- for (int j = 0; j < value.Length; j++) {
- if (source[i + j] != value[j]) {
- break;
- }
+ int valueIndex, sourceIndex;
+
+ for (valueIndex = 0, sourceIndex = i;
+ valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
+ valueIndex++, sourceIndex++) ;
- if (j == value.Length - 1) {
- return i + startIndex;
- }
+ if (valueIndex == value.Length)
+ {
+ return i;
}
}
return -1;
}
- internal static int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
+ internal static unsafe int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
{
Contract.Assert(source != null);
Contract.Assert(value != null);
@@ -66,27 +74,41 @@ namespace System.Globalization
return startIndex;
}
- // TODO (dotnet/corefx#3468): Move this into the shim so we don't have to do the ToUpper or call substring.
+ if (count < value.Length)
+ {
+ return -1;
+ }
+
+ // startIndex is the index into source where we start search backwards from.
+ // leftStartIndex is the index into source of the start of the string that is
+ // count characters away from startIndex.
+ int leftStartIndex = startIndex - count + 1;
if (ignoreCase)
{
- source = source.ToUpper(CultureInfo.InvariantCulture);
- value = value.ToUpper(CultureInfo.InvariantCulture);
+ fixed (char* pSource = source)
+ {
+ int lastIndex = Interop.GlobalizationInterop.LastIndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count);
+ return lastIndex != -1 ?
+ leftStartIndex + lastIndex :
+ -1;
+ }
}
- source = source.Substring(startIndex - count + 1, count);
+ for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--)
+ {
+ int valueIndex, sourceIndex;
- int last = -1;
+ for (valueIndex = 0, sourceIndex = i;
+ valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
+ valueIndex++, sourceIndex++) ;
- int cur = 0;
- while ((cur = IndexOfOrdinal(source, value, last + 1, source.Length - last - 1, false)) != -1)
- {
- last = cur;
+ if (valueIndex == value.Length) {
+ return i;
+ }
}
- return last >= 0 ?
- last + startIndex - count + 1 :
- -1;
+ return -1;
}
private unsafe int GetHashCodeOfStringCore(string source, CompareOptions options)
@@ -138,9 +160,9 @@ namespace System.Globalization
fixed (char* pSource = source)
{
- int lastIndex = Interop.GlobalizationInterop.IndexOf(m_sortNameAsUtf8, target, pSource + startIndex, count, options);
+ int index = Interop.GlobalizationInterop.IndexOf(m_sortNameAsUtf8, target, pSource + startIndex, count, options);
- return lastIndex != -1 ? lastIndex + startIndex : -1;
+ return index != -1 ? index + startIndex : -1;
}
}