summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLevi Broderick <GrabYourPitchforks@users.noreply.github.com>2018-10-11 16:25:20 -0700
committerGitHub <noreply@github.com>2018-10-11 16:25:20 -0700
commite2bcca7d9d0e36510eaba9b1028e16a5de39cee9 (patch)
tree137065ea7bf2f8af53e71e3249557812824a041c
parent907c013a7f5bf6ef4e37519ca27b7ea6fb998153 (diff)
downloadcoreclr-e2bcca7d9d0e36510eaba9b1028e16a5de39cee9.tar.gz
coreclr-e2bcca7d9d0e36510eaba9b1028e16a5de39cee9.tar.bz2
coreclr-e2bcca7d9d0e36510eaba9b1028e16a5de39cee9.zip
Improve performance of OrdinalIgnoreCase hash code calculation (#20309)
-rw-r--r--src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems1
-rw-r--r--src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs33
-rw-r--r--src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs96
-rw-r--r--src/System.Private.CoreLib/shared/System/Marvin.cs13
-rw-r--r--src/System.Private.CoreLib/shared/System/String.Comparison.cs11
-rw-r--r--src/System.Private.CoreLib/shared/System/StringComparer.cs4
6 files changed, 116 insertions, 42 deletions
diff --git a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
index ee2d2e69e7..e8520f213e 100644
--- a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
+++ b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
@@ -263,6 +263,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\IntPtr.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Lazy.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Marvin.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Marvin.OrdinalIgnoreCase.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Math.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\MathF.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\MarshalByRefObject.cs" />
diff --git a/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs b/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs
index 66a4023ffc..a23de3a133 100644
--- a/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs
+++ b/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs
@@ -1281,35 +1281,6 @@ namespace System.Globalization
return (this.Name.GetHashCode());
}
- internal static unsafe int GetIgnoreCaseHash(string source)
- {
- Debug.Assert(source != null, "source must not be null");
-
- // Do not allocate on the stack if string is empty
- if (source.Length == 0)
- {
- return source.GetHashCode();
- }
-
- char[] borrowedArr = null;
- Span<char> span = source.Length <= 255 ?
- stackalloc char[255] :
- (borrowedArr = ArrayPool<char>.Shared.Rent(source.Length));
-
- int charsWritten = source.AsSpan().ToUpperInvariant(span);
-
- // Slice the array to the size returned by ToUpperInvariant.
- int hash = Marvin.ComputeHash32(MemoryMarshal.AsBytes(span.Slice(0, charsWritten)), Marvin.DefaultSeed);
-
- // Return the borrowed array if necessary.
- if (borrowedArr != null)
- {
- ArrayPool<char>.Shared.Return(borrowedArr);
- }
-
- return hash;
- }
-
////////////////////////////////////////////////////////////////////////
//
// GetHashCodeOfString
@@ -1351,7 +1322,7 @@ namespace System.Globalization
if (_invariantMode)
{
- return ((options & CompareOptions.IgnoreCase) != 0) ? GetIgnoreCaseHash(source) : source.GetHashCode();
+ return ((options & CompareOptions.IgnoreCase) != 0) ? source.GetHashCodeOrdinalIgnoreCase() : source.GetHashCode();
}
return GetHashCodeOfStringCore(source, options);
@@ -1371,7 +1342,7 @@ namespace System.Globalization
if (options == CompareOptions.OrdinalIgnoreCase)
{
- return GetIgnoreCaseHash(source);
+ return source.GetHashCodeOrdinalIgnoreCase();
}
//
diff --git a/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs b/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs
new file mode 100644
index 0000000000..f55a9f515c
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs
@@ -0,0 +1,96 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+using System.Text;
+using Internal.Runtime.CompilerServices;
+
+#if BIT64
+using nuint = System.UInt64;
+#else
+using nuint = System.UInt32;
+#endif
+
+namespace System
+{
+ internal static partial class Marvin
+ {
+ /// <summary>
+ /// Compute a Marvin OrdinalIgnoreCase hash and collapse it into a 32-bit hash.
+ /// n.b. <paramref name="count"/> is specified as char count, not byte count.
+ /// </summary>
+ public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint p0, uint p1)
+ {
+ uint ucount = (uint)count; // in chars
+ nuint byteOffset = 0; // in bytes
+ uint tempValue;
+
+ // We operate on 32-bit integers (two chars) at a time.
+
+ while (ucount >= 2)
+ {
+ tempValue = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref data, byteOffset)));
+ if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
+ {
+ goto NotAscii;
+ }
+ p0 += Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue);
+ Block(ref p0, ref p1);
+
+ byteOffset += 4;
+ ucount -= 2;
+ }
+
+ // We have either one char (16 bits) or zero chars left over.
+ Debug.Assert(ucount < 2);
+
+ if (ucount > 0)
+ {
+ tempValue = Unsafe.AddByteOffset(ref data, byteOffset);
+ if (tempValue > 0x7Fu)
+ {
+ goto NotAscii;
+ }
+
+ // addition is written with -0x80u to allow fall-through to next statement rather than jmp past it
+ p0 += Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) + (0x800000u - 0x80u);
+ }
+ p0 += 0x80u;
+
+ Block(ref p0, ref p1);
+ Block(ref p0, ref p1);
+
+ return (int)(p1 ^ p0);
+
+ NotAscii:
+ Debug.Assert(0 <= ucount && ucount <= Int32.MaxValue); // this should fit into a signed int
+ return ComputeHash32OrdinalIgnoreCaseSlow(ref Unsafe.AddByteOffset(ref data, byteOffset), (int)ucount, p0, p1);
+ }
+
+ private static int ComputeHash32OrdinalIgnoreCaseSlow(ref char data, int count, uint p0, uint p1)
+ {
+ Debug.Assert(count > 0);
+
+ char[] borrowedArr = null;
+ Span<char> scratch = (uint)count <= 64 ? stackalloc char[64] : (borrowedArr = ArrayPool<char>.Shared.Rent(count));
+
+ int charsWritten = new ReadOnlySpan<char>(ref data, count).ToUpperInvariant(scratch);
+ Debug.Assert(charsWritten == count); // invariant case conversion should involve simple folding; preserve code unit count
+
+ // Slice the array to the size returned by ToUpperInvariant.
+ // Multiplication below may overflow, that's fine since it's going to an unsigned integer.
+ int hash = ComputeHash32(ref Unsafe.As<char, byte>(ref MemoryMarshal.GetReference(scratch)), charsWritten * 2, p0, p1);
+
+ // Return the borrowed array if necessary.
+ if (borrowedArr != null)
+ {
+ ArrayPool<char>.Shared.Return(borrowedArr);
+ }
+
+ return hash;
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/shared/System/Marvin.cs b/src/System.Private.CoreLib/shared/System/Marvin.cs
index e51ad9c552..dd8ac6c279 100644
--- a/src/System.Private.CoreLib/shared/System/Marvin.cs
+++ b/src/System.Private.CoreLib/shared/System/Marvin.cs
@@ -15,23 +15,20 @@ using nuint = System.UInt32;
namespace System
{
- internal static class Marvin
+ internal static partial class Marvin
{
/// <summary>
/// Compute a Marvin hash and collapse it into a 32-bit hash.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static int ComputeHash32(ReadOnlySpan<byte> data, ulong seed) => ComputeHash32(ref MemoryMarshal.GetReference(data), data.Length, seed);
+ public static int ComputeHash32(ReadOnlySpan<byte> data, ulong seed) => ComputeHash32(ref MemoryMarshal.GetReference(data), data.Length, (uint)seed, (uint)(seed >> 32));
/// <summary>
/// Compute a Marvin hash and collapse it into a 32-bit hash.
/// </summary>
- public static int ComputeHash32(ref byte data, int count, ulong seed)
+ public static int ComputeHash32(ref byte data, int count, uint p0, uint p1)
{
nuint ucount = (nuint)count;
- uint p0 = (uint)seed;
- uint p1 = (uint)(seed >> 32);
-
nuint byteOffset = 0;
while (ucount >= 8)
@@ -84,7 +81,7 @@ namespace System
goto case 3;
case 3:
- p0 += 0x80000000u | (((uint)(Unsafe.AddByteOffset(ref data, byteOffset + 2))) << 16)| (uint)(Unsafe.ReadUnaligned<ushort>(ref Unsafe.AddByteOffset(ref data, byteOffset)));
+ p0 += 0x80000000u | (((uint)(Unsafe.AddByteOffset(ref data, byteOffset + 2))) << 16) | (uint)(Unsafe.ReadUnaligned<ushort>(ref Unsafe.AddByteOffset(ref data, byteOffset)));
break;
default:
@@ -97,7 +94,7 @@ namespace System
return (int)(p1 ^ p0);
}
-
+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Block(ref uint rp0, ref uint rp1)
{
diff --git a/src/System.Private.CoreLib/shared/System/String.Comparison.cs b/src/System.Private.CoreLib/shared/System/String.Comparison.cs
index 8ac31796bb..f20f6c3c94 100644
--- a/src/System.Private.CoreLib/shared/System/String.Comparison.cs
+++ b/src/System.Private.CoreLib/shared/System/String.Comparison.cs
@@ -744,15 +744,24 @@ namespace System
// Gets a hash code for this string. If strings A and B are such that A.Equals(B), then
// they will return the same hash code.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public override int GetHashCode()
{
- return Marvin.ComputeHash32(ref Unsafe.As<char, byte>(ref _firstChar), _stringLength * 2, Marvin.DefaultSeed);
+ ulong seed = Marvin.DefaultSeed;
+ return Marvin.ComputeHash32(ref Unsafe.As<char, byte>(ref _firstChar), _stringLength * 2, (uint)seed, (uint)(seed >> 32));
}
// Gets a hash code for this string and this comparison. If strings A and B and comparison C are such
// that string.Equals(A, B, C), then they will return the same hash code with this comparison C.
public int GetHashCode(StringComparison comparisonType) => StringComparer.FromComparison(comparisonType).GetHashCode(this);
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal int GetHashCodeOrdinalIgnoreCase()
+ {
+ ulong seed = Marvin.DefaultSeed;
+ return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength, (uint)seed, (uint)(seed >> 32));
+ }
+
// Use this if and only if 'Denial of Service' attacks are not a concern (i.e. never used for free-form user input),
// or are otherwise mitigated
internal unsafe int GetNonRandomizedHashCode()
diff --git a/src/System.Private.CoreLib/shared/System/StringComparer.cs b/src/System.Private.CoreLib/shared/System/StringComparer.cs
index 47731cb782..cec4b894e0 100644
--- a/src/System.Private.CoreLib/shared/System/StringComparer.cs
+++ b/src/System.Private.CoreLib/shared/System/StringComparer.cs
@@ -307,7 +307,7 @@ namespace System
if (_ignoreCase)
{
- return CompareInfo.GetIgnoreCaseHash(obj);
+ return obj.GetHashCodeOrdinalIgnoreCase();
}
return obj.GetHashCode();
@@ -375,7 +375,7 @@ namespace System
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.obj);
}
- return CompareInfo.GetIgnoreCaseHash(obj);
+ return obj.GetHashCodeOrdinalIgnoreCase();
}
public void GetObjectData(SerializationInfo info, StreamingContext context)