diff options
author | Levi Broderick <GrabYourPitchforks@users.noreply.github.com> | 2018-10-11 16:25:20 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-11 16:25:20 -0700 |
commit | e2bcca7d9d0e36510eaba9b1028e16a5de39cee9 (patch) | |
tree | 137065ea7bf2f8af53e71e3249557812824a041c | |
parent | 907c013a7f5bf6ef4e37519ca27b7ea6fb998153 (diff) | |
download | coreclr-e2bcca7d9d0e36510eaba9b1028e16a5de39cee9.tar.gz coreclr-e2bcca7d9d0e36510eaba9b1028e16a5de39cee9.tar.bz2 coreclr-e2bcca7d9d0e36510eaba9b1028e16a5de39cee9.zip |
Improve performance of OrdinalIgnoreCase hash code calculation (#20309)
6 files changed, 116 insertions, 42 deletions
diff --git a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems index ee2d2e69e7..e8520f213e 100644 --- a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems +++ b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems @@ -263,6 +263,7 @@ <Compile Include="$(MSBuildThisFileDirectory)System\IntPtr.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Lazy.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Marvin.cs" /> + <Compile Include="$(MSBuildThisFileDirectory)System\Marvin.OrdinalIgnoreCase.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Math.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\MathF.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\MarshalByRefObject.cs" /> diff --git a/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs b/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs index 66a4023ffc..a23de3a133 100644 --- a/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs +++ b/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs @@ -1281,35 +1281,6 @@ namespace System.Globalization return (this.Name.GetHashCode()); } - internal static unsafe int GetIgnoreCaseHash(string source) - { - Debug.Assert(source != null, "source must not be null"); - - // Do not allocate on the stack if string is empty - if (source.Length == 0) - { - return source.GetHashCode(); - } - - char[] borrowedArr = null; - Span<char> span = source.Length <= 255 ? - stackalloc char[255] : - (borrowedArr = ArrayPool<char>.Shared.Rent(source.Length)); - - int charsWritten = source.AsSpan().ToUpperInvariant(span); - - // Slice the array to the size returned by ToUpperInvariant. - int hash = Marvin.ComputeHash32(MemoryMarshal.AsBytes(span.Slice(0, charsWritten)), Marvin.DefaultSeed); - - // Return the borrowed array if necessary. - if (borrowedArr != null) - { - ArrayPool<char>.Shared.Return(borrowedArr); - } - - return hash; - } - //////////////////////////////////////////////////////////////////////// // // GetHashCodeOfString @@ -1351,7 +1322,7 @@ namespace System.Globalization if (_invariantMode) { - return ((options & CompareOptions.IgnoreCase) != 0) ? GetIgnoreCaseHash(source) : source.GetHashCode(); + return ((options & CompareOptions.IgnoreCase) != 0) ? source.GetHashCodeOrdinalIgnoreCase() : source.GetHashCode(); } return GetHashCodeOfStringCore(source, options); @@ -1371,7 +1342,7 @@ namespace System.Globalization if (options == CompareOptions.OrdinalIgnoreCase) { - return GetIgnoreCaseHash(source); + return source.GetHashCodeOrdinalIgnoreCase(); } // diff --git a/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs b/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs new file mode 100644 index 0000000000..f55a9f515c --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs @@ -0,0 +1,96 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Diagnostics; +using System.Runtime.InteropServices; +using System.Text; +using Internal.Runtime.CompilerServices; + +#if BIT64 +using nuint = System.UInt64; +#else +using nuint = System.UInt32; +#endif + +namespace System +{ + internal static partial class Marvin + { + /// <summary> + /// Compute a Marvin OrdinalIgnoreCase hash and collapse it into a 32-bit hash. + /// n.b. <paramref name="count"/> is specified as char count, not byte count. + /// </summary> + public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint p0, uint p1) + { + uint ucount = (uint)count; // in chars + nuint byteOffset = 0; // in bytes + uint tempValue; + + // We operate on 32-bit integers (two chars) at a time. + + while (ucount >= 2) + { + tempValue = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref data, byteOffset))); + if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue)) + { + goto NotAscii; + } + p0 += Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue); + Block(ref p0, ref p1); + + byteOffset += 4; + ucount -= 2; + } + + // We have either one char (16 bits) or zero chars left over. + Debug.Assert(ucount < 2); + + if (ucount > 0) + { + tempValue = Unsafe.AddByteOffset(ref data, byteOffset); + if (tempValue > 0x7Fu) + { + goto NotAscii; + } + + // addition is written with -0x80u to allow fall-through to next statement rather than jmp past it + p0 += Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) + (0x800000u - 0x80u); + } + p0 += 0x80u; + + Block(ref p0, ref p1); + Block(ref p0, ref p1); + + return (int)(p1 ^ p0); + + NotAscii: + Debug.Assert(0 <= ucount && ucount <= Int32.MaxValue); // this should fit into a signed int + return ComputeHash32OrdinalIgnoreCaseSlow(ref Unsafe.AddByteOffset(ref data, byteOffset), (int)ucount, p0, p1); + } + + private static int ComputeHash32OrdinalIgnoreCaseSlow(ref char data, int count, uint p0, uint p1) + { + Debug.Assert(count > 0); + + char[] borrowedArr = null; + Span<char> scratch = (uint)count <= 64 ? stackalloc char[64] : (borrowedArr = ArrayPool<char>.Shared.Rent(count)); + + int charsWritten = new ReadOnlySpan<char>(ref data, count).ToUpperInvariant(scratch); + Debug.Assert(charsWritten == count); // invariant case conversion should involve simple folding; preserve code unit count + + // Slice the array to the size returned by ToUpperInvariant. + // Multiplication below may overflow, that's fine since it's going to an unsigned integer. + int hash = ComputeHash32(ref Unsafe.As<char, byte>(ref MemoryMarshal.GetReference(scratch)), charsWritten * 2, p0, p1); + + // Return the borrowed array if necessary. + if (borrowedArr != null) + { + ArrayPool<char>.Shared.Return(borrowedArr); + } + + return hash; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Marvin.cs b/src/System.Private.CoreLib/shared/System/Marvin.cs index e51ad9c552..dd8ac6c279 100644 --- a/src/System.Private.CoreLib/shared/System/Marvin.cs +++ b/src/System.Private.CoreLib/shared/System/Marvin.cs @@ -15,23 +15,20 @@ using nuint = System.UInt32; namespace System { - internal static class Marvin + internal static partial class Marvin { /// <summary> /// Compute a Marvin hash and collapse it into a 32-bit hash. /// </summary> [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int ComputeHash32(ReadOnlySpan<byte> data, ulong seed) => ComputeHash32(ref MemoryMarshal.GetReference(data), data.Length, seed); + public static int ComputeHash32(ReadOnlySpan<byte> data, ulong seed) => ComputeHash32(ref MemoryMarshal.GetReference(data), data.Length, (uint)seed, (uint)(seed >> 32)); /// <summary> /// Compute a Marvin hash and collapse it into a 32-bit hash. /// </summary> - public static int ComputeHash32(ref byte data, int count, ulong seed) + public static int ComputeHash32(ref byte data, int count, uint p0, uint p1) { nuint ucount = (nuint)count; - uint p0 = (uint)seed; - uint p1 = (uint)(seed >> 32); - nuint byteOffset = 0; while (ucount >= 8) @@ -84,7 +81,7 @@ namespace System goto case 3; case 3: - p0 += 0x80000000u | (((uint)(Unsafe.AddByteOffset(ref data, byteOffset + 2))) << 16)| (uint)(Unsafe.ReadUnaligned<ushort>(ref Unsafe.AddByteOffset(ref data, byteOffset))); + p0 += 0x80000000u | (((uint)(Unsafe.AddByteOffset(ref data, byteOffset + 2))) << 16) | (uint)(Unsafe.ReadUnaligned<ushort>(ref Unsafe.AddByteOffset(ref data, byteOffset))); break; default: @@ -97,7 +94,7 @@ namespace System return (int)(p1 ^ p0); } - + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void Block(ref uint rp0, ref uint rp1) { diff --git a/src/System.Private.CoreLib/shared/System/String.Comparison.cs b/src/System.Private.CoreLib/shared/System/String.Comparison.cs index 8ac31796bb..f20f6c3c94 100644 --- a/src/System.Private.CoreLib/shared/System/String.Comparison.cs +++ b/src/System.Private.CoreLib/shared/System/String.Comparison.cs @@ -744,15 +744,24 @@ namespace System // Gets a hash code for this string. If strings A and B are such that A.Equals(B), then // they will return the same hash code. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public override int GetHashCode() { - return Marvin.ComputeHash32(ref Unsafe.As<char, byte>(ref _firstChar), _stringLength * 2, Marvin.DefaultSeed); + ulong seed = Marvin.DefaultSeed; + return Marvin.ComputeHash32(ref Unsafe.As<char, byte>(ref _firstChar), _stringLength * 2, (uint)seed, (uint)(seed >> 32)); } // Gets a hash code for this string and this comparison. If strings A and B and comparison C are such // that string.Equals(A, B, C), then they will return the same hash code with this comparison C. public int GetHashCode(StringComparison comparisonType) => StringComparer.FromComparison(comparisonType).GetHashCode(this); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal int GetHashCodeOrdinalIgnoreCase() + { + ulong seed = Marvin.DefaultSeed; + return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength, (uint)seed, (uint)(seed >> 32)); + } + // Use this if and only if 'Denial of Service' attacks are not a concern (i.e. never used for free-form user input), // or are otherwise mitigated internal unsafe int GetNonRandomizedHashCode() diff --git a/src/System.Private.CoreLib/shared/System/StringComparer.cs b/src/System.Private.CoreLib/shared/System/StringComparer.cs index 47731cb782..cec4b894e0 100644 --- a/src/System.Private.CoreLib/shared/System/StringComparer.cs +++ b/src/System.Private.CoreLib/shared/System/StringComparer.cs @@ -307,7 +307,7 @@ namespace System if (_ignoreCase) { - return CompareInfo.GetIgnoreCaseHash(obj); + return obj.GetHashCodeOrdinalIgnoreCase(); } return obj.GetHashCode(); @@ -375,7 +375,7 @@ namespace System { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.obj); } - return CompareInfo.GetIgnoreCaseHash(obj); + return obj.GetHashCodeOrdinalIgnoreCase(); } public void GetObjectData(SerializationInfo info, StreamingContext context) |