diff options
author | Grant <grant@jesanna.com> | 2019-02-14 13:51:03 -0800 |
---|---|---|
committer | Jan Kotas <jkotas@microsoft.com> | 2019-02-14 13:51:03 -0800 |
commit | b4f99f2d85956cbf20610b283eab24e9dabcf3be (patch) | |
tree | 06ccaa7d1fe77076e95627a55849b7428f6ebe8d /src/System.Private.CoreLib | |
parent | 87c28ebac2739d38a1de417d9b90b3f0f1c2b104 (diff) | |
download | coreclr-b4f99f2d85956cbf20610b283eab24e9dabcf3be.tar.gz coreclr-b4f99f2d85956cbf20610b283eab24e9dabcf3be.tar.bz2 coreclr-b4f99f2d85956cbf20610b283eab24e9dabcf3be.zip |
Consolidate implementation of Rotate and PopCount (#22584)
* Perf: BitOps.LeadingZeroCount
* Remove redundant MSIL cast, conv.u8
* Use local functions for SoftwareFallback
* Target BIT32/64
Diffstat (limited to 'src/System.Private.CoreLib')
8 files changed, 155 insertions, 72 deletions
diff --git a/src/System.Private.CoreLib/shared/System/BitOps.cs b/src/System.Private.CoreLib/shared/System/BitOps.cs index cb54cf4fd7..021ee0bfc3 100644 --- a/src/System.Private.CoreLib/shared/System/BitOps.cs +++ b/src/System.Private.CoreLib/shared/System/BitOps.cs @@ -13,6 +13,11 @@ using Internal.Runtime.CompilerServices; namespace System { + /// <summary> + /// Utility methods for intrinsic bit-twiddling operations. + /// The methods use hardware intrinsics when available on the underlying platform, + /// otherwise they use optimized software fallbacks. + /// </summary> internal static class BitOps { // C# no-alloc optimization that directly wraps the data section of the dll (similar to string constants) @@ -53,11 +58,11 @@ namespace System { if (Bmi1.IsSupported) { - // Note that TZCNT contract specifies 0->32 + // TZCNT contract is 0->32 return (int)Bmi1.TrailingZeroCount(value); } - // Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32 + // Unguarded fallback contract is 0->0 if (value == 0) { return 32; @@ -67,8 +72,8 @@ namespace System return Unsafe.AddByteOffset( // Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_0111_1100_1011_0101_0011_0001u ref MemoryMarshal.GetReference(s_TrailingZeroCountDeBruijn), - // long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here - (IntPtr)(int)(((uint)((value & -value) * 0x077CB531u)) >> 27)); // shift over long also expensive on 32-bit + // uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here + (IntPtr)(int)(((value & (uint)-(int)value) * 0x077CB531u) >> 27)); // Multi-cast mitigates redundant conv.u8 } /// <summary> @@ -90,7 +95,7 @@ namespace System { if (Bmi1.X64.IsSupported) { - // Note that TZCNT contract specifies 0->64 + // TZCNT contract is 0->64 return (int)Bmi1.X64.TrailingZeroCount(value); } @@ -114,17 +119,17 @@ namespace System { if (Lzcnt.IsSupported) { - // Note that LZCNT contract specifies 0->32 + // LZCNT contract is 0->32 return (int)Lzcnt.LeadingZeroCount(value); } - // Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32 + // Unguarded fallback contract is 0->31 if (value == 0) { return 32; } - return 31 - Log2(value); + return 31 - Log2SoftwareFallback(value); } /// <summary> @@ -137,7 +142,7 @@ namespace System { if (Lzcnt.X64.IsSupported) { - // Note that LZCNT contract specifies 0->64 + // LZCNT contract is 0->64 return (int)Lzcnt.X64.LeadingZeroCount(value); } @@ -168,30 +173,32 @@ namespace System // 1000.. 0 31-0 31 if (Lzcnt.IsSupported) { - // Enforce conventional contract 0->0 (since Log(0) is undefined) + // Enforce conventional contract 0->0 (Log(0) is undefined) if (value == 0) { return 0; } - // Note that LZCNT contract specifies 0->32 + // LZCNT contract is 0->32 return 31 - (int)Lzcnt.LeadingZeroCount(value); } - // Already has contract 0->0, without branching + // Fallback contract is 0->0 return Log2SoftwareFallback(value); } /// <summary> /// Returns the integer (floor) log of the specified value, base 2. /// Note that by convention, input value 0 returns 0 since Log(0) is undefined. - /// Does not incur branching. + /// Does not directly use any hardware intrinsics, nor does it incur branching. /// </summary> /// <param name="value">The value.</param> private static int Log2SoftwareFallback(uint value) { // No AggressiveInlining due to large method size + // Has conventional contract 0->0 (Log(0) is undefined) + // Fill trailing zeros with ones, eg 00010010 becomes 00011111 value |= value >> 01; value |= value >> 02; value |= value >> 04; @@ -202,7 +209,7 @@ namespace System return Unsafe.AddByteOffset( // Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_1100_0100_1010_1100_1101_1101u ref MemoryMarshal.GetReference(s_Log2DeBruijn), - // long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here + // uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here (IntPtr)(int)((value * 0x07C4ACDDu) >> 27)); } @@ -216,13 +223,13 @@ namespace System { if (Lzcnt.X64.IsSupported) { - // Enforce conventional contract 0->0 (since Log(0) is undefined) + // Enforce conventional contract 0->0 (Log(0) is undefined) if (value == 0) { return 0; } - // Note that LZCNT contract specifies 0->64 + // LZCNT contract is 0->64 return 63 - (int)Lzcnt.X64.LeadingZeroCount(value); } @@ -235,5 +242,118 @@ namespace System return 32 + Log2(hi); } + + /// <summary> + /// Rotates the specified value left by the specified number of bits. + /// Similar in behavior to the x86 instruction ROL. + /// </summary> + /// <param name="value">The value to rotate.</param> + /// <param name="offset">The number of bits to rotate by. + /// Any value outside the range [0..31] is treated as congruent mod 32.</param> + /// <returns>The rotated value.</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static uint RotateLeft(uint value, int offset) + => (value << offset) | (value >> (32 - offset)); + + /// <summary> + /// Rotates the specified value left by the specified number of bits. + /// Similar in behavior to the x86 instruction ROL. + /// </summary> + /// <param name="value">The value to rotate.</param> + /// <param name="offset">The number of bits to rotate by. + /// Any value outside the range [0..63] is treated as congruent mod 64.</param> + /// <returns>The rotated value.</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong RotateLeft(ulong value, int offset) + => (value << offset) | (value >> (64 - offset)); + + /// <summary> + /// Rotates the specified value right by the specified number of bits. + /// Similar in behavior to the x86 instruction ROR. + /// </summary> + /// <param name="value">The value to rotate.</param> + /// <param name="offset">The number of bits to rotate by. + /// Any value outside the range [0..31] is treated as congruent mod 32.</param> + /// <returns>The rotated value.</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static uint RotateRight(uint value, int offset) + => (value >> offset) | (value << (32 - offset)); + + /// <summary> + /// Rotates the specified value right by the specified number of bits. + /// Similar in behavior to the x86 instruction ROR. + /// </summary> + /// <param name="value">The value to rotate.</param> + /// <param name="offset">The number of bits to rotate by. + /// Any value outside the range [0..63] is treated as congruent mod 64.</param> + /// <returns>The rotated value.</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong RotateRight(ulong value, int offset) + => (value >> offset) | (value << (64 - offset)); + + /// <summary> + /// Returns the population count (number of bits set) of a mask. + /// Similar in behavior to the x86 instruction POPCNT. + /// </summary> + /// <param name="value">The value.</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int PopCount(uint value) + { + if (Popcnt.IsSupported) + { + return (int)Popcnt.PopCount(value); + } + + return SoftwareFallback(value); + + int SoftwareFallback(uint v) + { + const uint c1 = 0x_55555555u; + const uint c2 = 0x_33333333u; + const uint c3 = 0x_0F0F0F0Fu; + const uint c4 = 0x_01010101u; + + v = v - ((v >> 1) & c1); + v = (v & c2) + ((v >> 2) & c2); + v = (((v + (v >> 4)) & c3) * c4) >> 24; + + return (int)v; + } + } + + /// <summary> + /// Returns the population count (number of bits set) of a mask. + /// Similar in behavior to the x86 instruction POPCNT. + /// </summary> + /// <param name="value">The value.</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int PopCount(ulong value) + { + if (Popcnt.X64.IsSupported) + { + return (int)Popcnt.X64.PopCount(value); + } + +#if BIT32 + return PopCount((uint)value) // lo + + PopCount((uint)(value >> 32)); // hi +#else + return SoftwareFallback(value); + + int SoftwareFallback(ulong v) + { + const ulong c1 = 0x_55555555_55555555ul; + const ulong c2 = 0x_33333333_33333333ul; + const ulong c3 = 0x_0F0F0F0F_0F0F0F0Ful; + const ulong c4 = 0x_01010101_01010101ul; + + v = v - ((v >> 1) & c1); + v = (v & c2) + ((v >> 2) & c2); + v = (((v + (v >> 4)) & c3) * c4) >> 56; + + return (int)v; + } +#endif + } } } diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs b/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs index d76fbd8dae..b72d765a87 100644 --- a/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs +++ b/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs @@ -105,10 +105,8 @@ namespace System.Buffers.Binary // Testing shows that throughput increases if the AND // is performed before the ROL / ROR. - uint mask_xx_zz = (value & 0x00FF00FFU); - uint mask_ww_yy = (value & 0xFF00FF00U); - return ((mask_xx_zz >> 8) | (mask_xx_zz << 24)) - + ((mask_ww_yy << 8) | (mask_ww_yy >> 24)); + return BitOps.RotateRight(value & 0x00FF00FFu, 8) // xx zz + + BitOps.RotateLeft(value & 0xFF00FF00u, 8); // ww yy } /// <summary> diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs index 387e08f4f6..16e737a0c9 100644 --- a/src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs @@ -103,11 +103,9 @@ namespace System.Buffers.Text [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int CountHexDigits(ulong value) { - int right = 64 - BitOps.LeadingZeroCount(value | 1); - return (right + 3) >> 2; + return (64 - BitOps.LeadingZeroCount(value | 1) + 3) >> 2; } - // Counts the number of trailing '0' digits in a decimal number. // e.g., value = 0 => retVal = 0, valueWithoutTrailingZeros = 0 // value = 1234 => retVal = 0, valueWithoutTrailingZeros = 1234 diff --git a/src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs b/src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs index e4247fef5b..389ae10edd 100644 --- a/src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs +++ b/src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs @@ -1579,7 +1579,7 @@ namespace System.Diagnostics.Tracing { for (int i = 16; i != 80; i++) { - this.w[i] = Rol1((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16])); + this.w[i] = BitOps.RotateLeft((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]), 1); } unchecked @@ -1594,28 +1594,28 @@ namespace System.Diagnostics.Tracing { const uint k = 0x5A827999; uint f = (b & c) | ((~b) & d); - uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp; + uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp; } for (int i = 20; i != 40; i++) { uint f = b ^ c ^ d; const uint k = 0x6ED9EBA1; - uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp; + uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp; } for (int i = 40; i != 60; i++) { uint f = (b & c) | (b & d) | (c & d); const uint k = 0x8F1BBCDC; - uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp; + uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp; } for (int i = 60; i != 80; i++) { uint f = b ^ c ^ d; const uint k = 0xCA62C1D6; - uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp; + uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp; } this.w[80] += a; @@ -1628,21 +1628,6 @@ namespace System.Diagnostics.Tracing this.length += 512; // 64 bytes == 512 bits this.pos = 0; } - - private static uint Rol1(uint input) - { - return (input << 1) | (input >> 31); - } - - private static uint Rol5(uint input) - { - return (input << 5) | (input >> 27); - } - - private static uint Rol30(uint input) - { - return (input << 30) | (input >> 2); - } } private static Guid GenerateGuidFromName(string name) diff --git a/src/System.Private.CoreLib/shared/System/HashCode.cs b/src/System.Private.CoreLib/shared/System/HashCode.cs index 4be57be02a..a534232330 100644 --- a/src/System.Private.CoreLib/shared/System/HashCode.cs +++ b/src/System.Private.CoreLib/shared/System/HashCode.cs @@ -253,10 +253,6 @@ namespace System } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint Rol(uint value, int count) - => (value << count) | (value >> (32 - count)); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v4) { v1 = s_seed + Prime1 + Prime2; @@ -268,23 +264,19 @@ namespace System [MethodImpl(MethodImplOptions.AggressiveInlining)] private static uint Round(uint hash, uint input) { - hash += input * Prime2; - hash = Rol(hash, 13); - hash *= Prime1; - return hash; + return BitOps.RotateLeft(hash + input * Prime2, 13) * Prime1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static uint QueueRound(uint hash, uint queuedValue) { - hash += queuedValue * Prime3; - return Rol(hash, 17) * Prime4; + return BitOps.RotateLeft(hash + queuedValue * Prime3, 17) * Prime4; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static uint MixState(uint v1, uint v2, uint v3, uint v4) { - return Rol(v1, 1) + Rol(v2, 7) + Rol(v3, 12) + Rol(v4, 18); + return BitOps.RotateLeft(v1, 1) + BitOps.RotateLeft(v2, 7) + BitOps.RotateLeft(v3, 12) + BitOps.RotateLeft(v4, 18); } private static uint MixEmptyState() diff --git a/src/System.Private.CoreLib/shared/System/Marvin.cs b/src/System.Private.CoreLib/shared/System/Marvin.cs index dd8ac6c279..b9f6792c9a 100644 --- a/src/System.Private.CoreLib/shared/System/Marvin.cs +++ b/src/System.Private.CoreLib/shared/System/Marvin.cs @@ -102,28 +102,21 @@ namespace System uint p1 = rp1; p1 ^= p0; - p0 = _rotl(p0, 20); + p0 = BitOps.RotateLeft(p0, 20); p0 += p1; - p1 = _rotl(p1, 9); + p1 = BitOps.RotateLeft(p1, 9); p1 ^= p0; - p0 = _rotl(p0, 27); + p0 = BitOps.RotateLeft(p0, 27); p0 += p1; - p1 = _rotl(p1, 19); + p1 = BitOps.RotateLeft(p1, 19); rp0 = p0; rp1 = p1; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint _rotl(uint value, int shift) - { - // This is expected to be optimized into a single rol (or ror with negated shift value) instruction - return (value << shift) | (value >> (32 - shift)); - } - public static ulong DefaultSeed { get; } = GenerateSeed(); private static unsafe ulong GenerateSeed() diff --git a/src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs b/src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs index fd9d708d36..29f8a561f2 100644 --- a/src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs +++ b/src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs @@ -10,10 +10,7 @@ namespace System.Numerics.Hashing public static int Combine(int h1, int h2) { - // RyuJIT optimizes this to use the ROL instruction - // Related GitHub pull request: dotnet/coreclr#1830 - uint rol5 = ((uint)h1 << 5) | ((uint)h1 >> 27); - return ((int)rol5 + h1) ^ h2; + return ((int)BitOps.RotateLeft((uint)h1, 5) + h1) ^ h2; } } } diff --git a/src/System.Private.CoreLib/shared/System/String.Comparison.cs b/src/System.Private.CoreLib/shared/System/String.Comparison.cs index 4b8a160d76..733319eec9 100644 --- a/src/System.Private.CoreLib/shared/System/String.Comparison.cs +++ b/src/System.Private.CoreLib/shared/System/String.Comparison.cs @@ -820,15 +820,15 @@ namespace System { length -= 4; // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator - hash1 = (((hash1 << 5) | (hash1 >> 27)) + hash1) ^ ptr[0]; - hash2 = (((hash2 << 5) | (hash2 >> 27)) + hash2) ^ ptr[1]; + hash1 = (BitOps.RotateLeft(hash1, 5) + hash1) ^ ptr[0]; + hash2 = (BitOps.RotateLeft(hash2, 5) + hash2) ^ ptr[1]; ptr += 2; } if (length > 0) { // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator - hash2 = (((hash2 << 5) | (hash2 >> 27)) + hash2) ^ ptr[0]; + hash2 = (BitOps.RotateLeft(hash2, 5) + hash2) ^ ptr[0]; } return (int)(hash1 + (hash2 * 1566083941)); |