summaryrefslogtreecommitdiff
path: root/src/System.Private.CoreLib
diff options
context:
space:
mode:
authorGrant <grant@jesanna.com>2019-02-14 13:51:03 -0800
committerJan Kotas <jkotas@microsoft.com>2019-02-14 13:51:03 -0800
commitb4f99f2d85956cbf20610b283eab24e9dabcf3be (patch)
tree06ccaa7d1fe77076e95627a55849b7428f6ebe8d /src/System.Private.CoreLib
parent87c28ebac2739d38a1de417d9b90b3f0f1c2b104 (diff)
downloadcoreclr-b4f99f2d85956cbf20610b283eab24e9dabcf3be.tar.gz
coreclr-b4f99f2d85956cbf20610b283eab24e9dabcf3be.tar.bz2
coreclr-b4f99f2d85956cbf20610b283eab24e9dabcf3be.zip
Consolidate implementation of Rotate and PopCount (#22584)
* Perf: BitOps.LeadingZeroCount * Remove redundant MSIL cast, conv.u8 * Use local functions for SoftwareFallback * Target BIT32/64
Diffstat (limited to 'src/System.Private.CoreLib')
-rw-r--r--src/System.Private.CoreLib/shared/System/BitOps.cs152
-rw-r--r--src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs6
-rw-r--r--src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs4
-rw-r--r--src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs25
-rw-r--r--src/System.Private.CoreLib/shared/System/HashCode.cs14
-rw-r--r--src/System.Private.CoreLib/shared/System/Marvin.cs15
-rw-r--r--src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs5
-rw-r--r--src/System.Private.CoreLib/shared/System/String.Comparison.cs6
8 files changed, 155 insertions, 72 deletions
diff --git a/src/System.Private.CoreLib/shared/System/BitOps.cs b/src/System.Private.CoreLib/shared/System/BitOps.cs
index cb54cf4fd7..021ee0bfc3 100644
--- a/src/System.Private.CoreLib/shared/System/BitOps.cs
+++ b/src/System.Private.CoreLib/shared/System/BitOps.cs
@@ -13,6 +13,11 @@ using Internal.Runtime.CompilerServices;
namespace System
{
+ /// <summary>
+ /// Utility methods for intrinsic bit-twiddling operations.
+ /// The methods use hardware intrinsics when available on the underlying platform,
+ /// otherwise they use optimized software fallbacks.
+ /// </summary>
internal static class BitOps
{
// C# no-alloc optimization that directly wraps the data section of the dll (similar to string constants)
@@ -53,11 +58,11 @@ namespace System
{
if (Bmi1.IsSupported)
{
- // Note that TZCNT contract specifies 0->32
+ // TZCNT contract is 0->32
return (int)Bmi1.TrailingZeroCount(value);
}
- // Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32
+ // Unguarded fallback contract is 0->0
if (value == 0)
{
return 32;
@@ -67,8 +72,8 @@ namespace System
return Unsafe.AddByteOffset(
// Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_0111_1100_1011_0101_0011_0001u
ref MemoryMarshal.GetReference(s_TrailingZeroCountDeBruijn),
- // long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
- (IntPtr)(int)(((uint)((value & -value) * 0x077CB531u)) >> 27)); // shift over long also expensive on 32-bit
+ // uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here
+ (IntPtr)(int)(((value & (uint)-(int)value) * 0x077CB531u) >> 27)); // Multi-cast mitigates redundant conv.u8
}
/// <summary>
@@ -90,7 +95,7 @@ namespace System
{
if (Bmi1.X64.IsSupported)
{
- // Note that TZCNT contract specifies 0->64
+ // TZCNT contract is 0->64
return (int)Bmi1.X64.TrailingZeroCount(value);
}
@@ -114,17 +119,17 @@ namespace System
{
if (Lzcnt.IsSupported)
{
- // Note that LZCNT contract specifies 0->32
+ // LZCNT contract is 0->32
return (int)Lzcnt.LeadingZeroCount(value);
}
- // Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32
+ // Unguarded fallback contract is 0->31
if (value == 0)
{
return 32;
}
- return 31 - Log2(value);
+ return 31 - Log2SoftwareFallback(value);
}
/// <summary>
@@ -137,7 +142,7 @@ namespace System
{
if (Lzcnt.X64.IsSupported)
{
- // Note that LZCNT contract specifies 0->64
+ // LZCNT contract is 0->64
return (int)Lzcnt.X64.LeadingZeroCount(value);
}
@@ -168,30 +173,32 @@ namespace System
// 1000.. 0 31-0 31
if (Lzcnt.IsSupported)
{
- // Enforce conventional contract 0->0 (since Log(0) is undefined)
+ // Enforce conventional contract 0->0 (Log(0) is undefined)
if (value == 0)
{
return 0;
}
- // Note that LZCNT contract specifies 0->32
+ // LZCNT contract is 0->32
return 31 - (int)Lzcnt.LeadingZeroCount(value);
}
- // Already has contract 0->0, without branching
+ // Fallback contract is 0->0
return Log2SoftwareFallback(value);
}
/// <summary>
/// Returns the integer (floor) log of the specified value, base 2.
/// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
- /// Does not incur branching.
+ /// Does not directly use any hardware intrinsics, nor does it incur branching.
/// </summary>
/// <param name="value">The value.</param>
private static int Log2SoftwareFallback(uint value)
{
// No AggressiveInlining due to large method size
+ // Has conventional contract 0->0 (Log(0) is undefined)
+ // Fill trailing zeros with ones, eg 00010010 becomes 00011111
value |= value >> 01;
value |= value >> 02;
value |= value >> 04;
@@ -202,7 +209,7 @@ namespace System
return Unsafe.AddByteOffset(
// Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_1100_0100_1010_1100_1101_1101u
ref MemoryMarshal.GetReference(s_Log2DeBruijn),
- // long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
+ // uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here
(IntPtr)(int)((value * 0x07C4ACDDu) >> 27));
}
@@ -216,13 +223,13 @@ namespace System
{
if (Lzcnt.X64.IsSupported)
{
- // Enforce conventional contract 0->0 (since Log(0) is undefined)
+ // Enforce conventional contract 0->0 (Log(0) is undefined)
if (value == 0)
{
return 0;
}
- // Note that LZCNT contract specifies 0->64
+ // LZCNT contract is 0->64
return 63 - (int)Lzcnt.X64.LeadingZeroCount(value);
}
@@ -235,5 +242,118 @@ namespace System
return 32 + Log2(hi);
}
+
+ /// <summary>
+ /// Rotates the specified value left by the specified number of bits.
+ /// Similar in behavior to the x86 instruction ROL.
+ /// </summary>
+ /// <param name="value">The value to rotate.</param>
+ /// <param name="offset">The number of bits to rotate by.
+ /// Any value outside the range [0..31] is treated as congruent mod 32.</param>
+ /// <returns>The rotated value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint RotateLeft(uint value, int offset)
+ => (value << offset) | (value >> (32 - offset));
+
+ /// <summary>
+ /// Rotates the specified value left by the specified number of bits.
+ /// Similar in behavior to the x86 instruction ROL.
+ /// </summary>
+ /// <param name="value">The value to rotate.</param>
+ /// <param name="offset">The number of bits to rotate by.
+ /// Any value outside the range [0..63] is treated as congruent mod 64.</param>
+ /// <returns>The rotated value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong RotateLeft(ulong value, int offset)
+ => (value << offset) | (value >> (64 - offset));
+
+ /// <summary>
+ /// Rotates the specified value right by the specified number of bits.
+ /// Similar in behavior to the x86 instruction ROR.
+ /// </summary>
+ /// <param name="value">The value to rotate.</param>
+ /// <param name="offset">The number of bits to rotate by.
+ /// Any value outside the range [0..31] is treated as congruent mod 32.</param>
+ /// <returns>The rotated value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static uint RotateRight(uint value, int offset)
+ => (value >> offset) | (value << (32 - offset));
+
+ /// <summary>
+ /// Rotates the specified value right by the specified number of bits.
+ /// Similar in behavior to the x86 instruction ROR.
+ /// </summary>
+ /// <param name="value">The value to rotate.</param>
+ /// <param name="offset">The number of bits to rotate by.
+ /// Any value outside the range [0..63] is treated as congruent mod 64.</param>
+ /// <returns>The rotated value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ulong RotateRight(ulong value, int offset)
+ => (value >> offset) | (value << (64 - offset));
+
+ /// <summary>
+ /// Returns the population count (number of bits set) of a mask.
+ /// Similar in behavior to the x86 instruction POPCNT.
+ /// </summary>
+ /// <param name="value">The value.</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int PopCount(uint value)
+ {
+ if (Popcnt.IsSupported)
+ {
+ return (int)Popcnt.PopCount(value);
+ }
+
+ return SoftwareFallback(value);
+
+ int SoftwareFallback(uint v)
+ {
+ const uint c1 = 0x_55555555u;
+ const uint c2 = 0x_33333333u;
+ const uint c3 = 0x_0F0F0F0Fu;
+ const uint c4 = 0x_01010101u;
+
+ v = v - ((v >> 1) & c1);
+ v = (v & c2) + ((v >> 2) & c2);
+ v = (((v + (v >> 4)) & c3) * c4) >> 24;
+
+ return (int)v;
+ }
+ }
+
+ /// <summary>
+ /// Returns the population count (number of bits set) of a mask.
+ /// Similar in behavior to the x86 instruction POPCNT.
+ /// </summary>
+ /// <param name="value">The value.</param>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int PopCount(ulong value)
+ {
+ if (Popcnt.X64.IsSupported)
+ {
+ return (int)Popcnt.X64.PopCount(value);
+ }
+
+#if BIT32
+ return PopCount((uint)value) // lo
+ + PopCount((uint)(value >> 32)); // hi
+#else
+ return SoftwareFallback(value);
+
+ int SoftwareFallback(ulong v)
+ {
+ const ulong c1 = 0x_55555555_55555555ul;
+ const ulong c2 = 0x_33333333_33333333ul;
+ const ulong c3 = 0x_0F0F0F0F_0F0F0F0Ful;
+ const ulong c4 = 0x_01010101_01010101ul;
+
+ v = v - ((v >> 1) & c1);
+ v = (v & c2) + ((v >> 2) & c2);
+ v = (((v + (v >> 4)) & c3) * c4) >> 56;
+
+ return (int)v;
+ }
+#endif
+ }
}
}
diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs b/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs
index d76fbd8dae..b72d765a87 100644
--- a/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs
+++ b/src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs
@@ -105,10 +105,8 @@ namespace System.Buffers.Binary
// Testing shows that throughput increases if the AND
// is performed before the ROL / ROR.
- uint mask_xx_zz = (value & 0x00FF00FFU);
- uint mask_ww_yy = (value & 0xFF00FF00U);
- return ((mask_xx_zz >> 8) | (mask_xx_zz << 24))
- + ((mask_ww_yy << 8) | (mask_ww_yy >> 24));
+ return BitOps.RotateRight(value & 0x00FF00FFu, 8) // xx zz
+ + BitOps.RotateLeft(value & 0xFF00FF00u, 8); // ww yy
}
/// <summary>
diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs
index 387e08f4f6..16e737a0c9 100644
--- a/src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs
+++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs
@@ -103,11 +103,9 @@ namespace System.Buffers.Text
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int CountHexDigits(ulong value)
{
- int right = 64 - BitOps.LeadingZeroCount(value | 1);
- return (right + 3) >> 2;
+ return (64 - BitOps.LeadingZeroCount(value | 1) + 3) >> 2;
}
-
// Counts the number of trailing '0' digits in a decimal number.
// e.g., value = 0 => retVal = 0, valueWithoutTrailingZeros = 0
// value = 1234 => retVal = 0, valueWithoutTrailingZeros = 1234
diff --git a/src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs b/src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs
index e4247fef5b..389ae10edd 100644
--- a/src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs
+++ b/src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs
@@ -1579,7 +1579,7 @@ namespace System.Diagnostics.Tracing
{
for (int i = 16; i != 80; i++)
{
- this.w[i] = Rol1((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]));
+ this.w[i] = BitOps.RotateLeft((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]), 1);
}
unchecked
@@ -1594,28 +1594,28 @@ namespace System.Diagnostics.Tracing
{
const uint k = 0x5A827999;
uint f = (b & c) | ((~b) & d);
- uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
+ uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}
for (int i = 20; i != 40; i++)
{
uint f = b ^ c ^ d;
const uint k = 0x6ED9EBA1;
- uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
+ uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}
for (int i = 40; i != 60; i++)
{
uint f = (b & c) | (b & d) | (c & d);
const uint k = 0x8F1BBCDC;
- uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
+ uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}
for (int i = 60; i != 80; i++)
{
uint f = b ^ c ^ d;
const uint k = 0xCA62C1D6;
- uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
+ uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}
this.w[80] += a;
@@ -1628,21 +1628,6 @@ namespace System.Diagnostics.Tracing
this.length += 512; // 64 bytes == 512 bits
this.pos = 0;
}
-
- private static uint Rol1(uint input)
- {
- return (input << 1) | (input >> 31);
- }
-
- private static uint Rol5(uint input)
- {
- return (input << 5) | (input >> 27);
- }
-
- private static uint Rol30(uint input)
- {
- return (input << 30) | (input >> 2);
- }
}
private static Guid GenerateGuidFromName(string name)
diff --git a/src/System.Private.CoreLib/shared/System/HashCode.cs b/src/System.Private.CoreLib/shared/System/HashCode.cs
index 4be57be02a..a534232330 100644
--- a/src/System.Private.CoreLib/shared/System/HashCode.cs
+++ b/src/System.Private.CoreLib/shared/System/HashCode.cs
@@ -253,10 +253,6 @@ namespace System
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static uint Rol(uint value, int count)
- => (value << count) | (value >> (32 - count));
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v4)
{
v1 = s_seed + Prime1 + Prime2;
@@ -268,23 +264,19 @@ namespace System
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint Round(uint hash, uint input)
{
- hash += input * Prime2;
- hash = Rol(hash, 13);
- hash *= Prime1;
- return hash;
+ return BitOps.RotateLeft(hash + input * Prime2, 13) * Prime1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint QueueRound(uint hash, uint queuedValue)
{
- hash += queuedValue * Prime3;
- return Rol(hash, 17) * Prime4;
+ return BitOps.RotateLeft(hash + queuedValue * Prime3, 17) * Prime4;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint MixState(uint v1, uint v2, uint v3, uint v4)
{
- return Rol(v1, 1) + Rol(v2, 7) + Rol(v3, 12) + Rol(v4, 18);
+ return BitOps.RotateLeft(v1, 1) + BitOps.RotateLeft(v2, 7) + BitOps.RotateLeft(v3, 12) + BitOps.RotateLeft(v4, 18);
}
private static uint MixEmptyState()
diff --git a/src/System.Private.CoreLib/shared/System/Marvin.cs b/src/System.Private.CoreLib/shared/System/Marvin.cs
index dd8ac6c279..b9f6792c9a 100644
--- a/src/System.Private.CoreLib/shared/System/Marvin.cs
+++ b/src/System.Private.CoreLib/shared/System/Marvin.cs
@@ -102,28 +102,21 @@ namespace System
uint p1 = rp1;
p1 ^= p0;
- p0 = _rotl(p0, 20);
+ p0 = BitOps.RotateLeft(p0, 20);
p0 += p1;
- p1 = _rotl(p1, 9);
+ p1 = BitOps.RotateLeft(p1, 9);
p1 ^= p0;
- p0 = _rotl(p0, 27);
+ p0 = BitOps.RotateLeft(p0, 27);
p0 += p1;
- p1 = _rotl(p1, 19);
+ p1 = BitOps.RotateLeft(p1, 19);
rp0 = p0;
rp1 = p1;
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static uint _rotl(uint value, int shift)
- {
- // This is expected to be optimized into a single rol (or ror with negated shift value) instruction
- return (value << shift) | (value >> (32 - shift));
- }
-
public static ulong DefaultSeed { get; } = GenerateSeed();
private static unsafe ulong GenerateSeed()
diff --git a/src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs b/src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs
index fd9d708d36..29f8a561f2 100644
--- a/src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs
+++ b/src/System.Private.CoreLib/shared/System/Numerics/Hashing/HashHelpers.cs
@@ -10,10 +10,7 @@ namespace System.Numerics.Hashing
public static int Combine(int h1, int h2)
{
- // RyuJIT optimizes this to use the ROL instruction
- // Related GitHub pull request: dotnet/coreclr#1830
- uint rol5 = ((uint)h1 << 5) | ((uint)h1 >> 27);
- return ((int)rol5 + h1) ^ h2;
+ return ((int)BitOps.RotateLeft((uint)h1, 5) + h1) ^ h2;
}
}
}
diff --git a/src/System.Private.CoreLib/shared/System/String.Comparison.cs b/src/System.Private.CoreLib/shared/System/String.Comparison.cs
index 4b8a160d76..733319eec9 100644
--- a/src/System.Private.CoreLib/shared/System/String.Comparison.cs
+++ b/src/System.Private.CoreLib/shared/System/String.Comparison.cs
@@ -820,15 +820,15 @@ namespace System
{
length -= 4;
// Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator
- hash1 = (((hash1 << 5) | (hash1 >> 27)) + hash1) ^ ptr[0];
- hash2 = (((hash2 << 5) | (hash2 >> 27)) + hash2) ^ ptr[1];
+ hash1 = (BitOps.RotateLeft(hash1, 5) + hash1) ^ ptr[0];
+ hash2 = (BitOps.RotateLeft(hash2, 5) + hash2) ^ ptr[1];
ptr += 2;
}
if (length > 0)
{
// Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator
- hash2 = (((hash2 << 5) | (hash2 >> 27)) + hash2) ^ ptr[0];
+ hash2 = (BitOps.RotateLeft(hash2, 5) + hash2) ^ ptr[0];
}
return (int)(hash1 + (hash2 * 1566083941));