summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack Pappas <jack-pappas@users.noreply.github.com>2018-11-01 01:50:23 -0400
committerJan Kotas <jkotas@microsoft.com>2018-10-31 22:50:23 -0700
commit6fe7effad7fddf8d5dc0b3ac3d5be5ec80e158ff (patch)
treeb8dafadb9aa2c9dcdf77f9dad84d3c79a42dedef
parentd378770b6c21c5b74692cc251a3f8c58b6f377db (diff)
downloadcoreclr-6fe7effad7fddf8d5dc0b3ac3d5be5ec80e158ff.tar.gz
coreclr-6fe7effad7fddf8d5dc0b3ac3d5be5ec80e158ff.tar.bz2
coreclr-6fe7effad7fddf8d5dc0b3ac3d5be5ec80e158ff.zip
Make BitScanForward/BitScanForward64 PAL wrappers branchless. (#20412)
The BitScanForward/BitScanForward64 wrapper functions from the PAL and gcenv have been modified so they're faster (and branchless), while also adhering more closely to the behavior of the MSVC intrinsics. Use _BitScanForward64 when targeting 64-bit Windows. The _WIN32 macro is always defined by MSVC, even when targeting 64-bit versions of Windows. Use the _WIN64 macro instead to check whether the build is targeting 64-bit Windows, and if so, use the _BitScanForward64 intrinsic for the BitScanForward64 wrapper instead of the 32-bit-based fallback.
-rw-r--r--src/gc/env/gcenv.base.h44
-rw-r--r--src/pal/inc/pal.h38
2 files changed, 37 insertions, 45 deletions
diff --git a/src/gc/env/gcenv.base.h b/src/gc/env/gcenv.base.h
index 15a81d77ae..da66e9b8a3 100644
--- a/src/gc/env/gcenv.base.h
+++ b/src/gc/env/gcenv.base.h
@@ -220,34 +220,40 @@ typedef DWORD (WINAPI *PTHREAD_START_ROUTINE)(void* lpThreadParameter);
#ifdef _MSC_VER
#pragma intrinsic(_BitScanForward)
-#if WIN64
+#if _WIN64
#pragma intrinsic(_BitScanForward64)
#endif
#endif // _MSC_VER
// Cross-platform wrapper for the _BitScanForward compiler intrinsic.
+// A value is unconditionally stored through the bitIndex argument,
+// but callers should only rely on it when the function returns TRUE;
+// otherwise, the stored value is undefined and varies by implementation
+// and hardware platform.
inline uint8_t BitScanForward(uint32_t *bitIndex, uint32_t mask)
{
#ifdef _MSC_VER
return _BitScanForward((unsigned long*)bitIndex, mask);
#else // _MSC_VER
- unsigned char ret = FALSE;
- int iIndex = __builtin_ffsl(mask);
- if (iIndex != 0)
- {
- *bitIndex = (uint32_t)(iIndex - 1);
- ret = TRUE;
- }
-
- return ret;
+ int iIndex = __builtin_ffs(mask);
+ *bitIndex = static_cast<uint32_t>(iIndex - 1);
+ // Both GCC and Clang generate better, smaller code if we check whether the
+ // mask was/is zero rather than the equivalent check that iIndex is zero.
+ return mask != 0 ? TRUE : FALSE;
#endif // _MSC_VER
}
// Cross-platform wrapper for the _BitScanForward64 compiler intrinsic.
+// A value is unconditionally stored through the bitIndex argument,
+// but callers should only rely on it when the function returns TRUE;
+// otherwise, the stored value is undefined and varies by implementation
+// and hardware platform.
inline uint8_t BitScanForward64(uint32_t *bitIndex, uint64_t mask)
{
#ifdef _MSC_VER
- #if _WIN32
+ #if _WIN64
+ return _BitScanForward64((unsigned long*)bitIndex, mask);
+ #else
// MSVC targeting a 32-bit target does not support this intrinsic.
// We can fake it using two successive invocations of _BitScanForward.
uint32_t hi = (mask >> 32) & 0xFFFFFFFF;
@@ -265,19 +271,13 @@ inline uint8_t BitScanForward64(uint32_t *bitIndex, uint64_t mask)
}
return result;
- #else
- return _BitScanForward64((unsigned long*)bitIndex, mask);
- #endif // _WIN32
+ #endif // _WIN64
#else
- unsigned char ret = FALSE;
int iIndex = __builtin_ffsll(mask);
- if (iIndex != 0)
- {
- *bitIndex = (uint32_t)(iIndex - 1);
- ret = TRUE;
- }
-
- return ret;
+ *bitIndex = static_cast<uint32_t>(iIndex - 1);
+ // Both GCC and Clang generate better, smaller code if we check whether the
+ // mask was/is zero rather than the equivalent check that iIndex is zero.
+ return mask != 0 ? TRUE : FALSE;
#endif // _MSC_VER
}
diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h
index f117a6eb9c..690804013e 100644
--- a/src/pal/inc/pal.h
+++ b/src/pal/inc/pal.h
@@ -3254,10 +3254,10 @@ typedef EXCEPTION_DISPOSITION (PALAPI *PVECTORED_EXCEPTION_HANDLER)(
// Define BitScanForward64 and BitScanForward
// Per MSDN, BitScanForward64 will search the mask data from LSB to MSB for a set bit.
-// If one is found, its bit position is returned in the out PDWORD argument and 1 is returned.
-// Otherwise, 0 is returned.
+// If one is found, its bit position is stored in the out PDWORD argument and 1 is returned;
+// otherwise, an undefined value is stored in the out PDWORD argument and 0 is returned.
//
-// On GCC, the equivalent function is __builtin_ffsl. It returns 1+index of the least
+// On GCC, the equivalent function is __builtin_ffsll. It returns 1+index of the least
// significant set bit, or 0 if if mask is zero.
//
// The same is true for BitScanForward, except that the GCC function is __builtin_ffs.
@@ -3270,16 +3270,12 @@ BitScanForward(
IN OUT PDWORD Index,
IN UINT qwMask)
{
- unsigned char bRet = FALSE;
- int iIndex = __builtin_ffsl(qwMask);
- if (iIndex != 0)
- {
- // Set the Index after deducting unity
- *Index = (DWORD)(iIndex - 1);
- bRet = TRUE;
- }
-
- return bRet;
+ int iIndex = __builtin_ffs(qwMask);
+ // Set the Index after deducting unity
+ *Index = (DWORD)(iIndex - 1);
+ // Both GCC and Clang generate better, smaller code if we check whether the
+ // mask was/is zero rather than the equivalent check that iIndex is zero.
+ return qwMask != 0 ? TRUE : FALSE;
}
EXTERN_C
@@ -3291,16 +3287,12 @@ BitScanForward64(
IN OUT PDWORD Index,
IN UINT64 qwMask)
{
- unsigned char bRet = FALSE;
- int iIndex = __builtin_ffsl(qwMask);
- if (iIndex != 0)
- {
- // Set the Index after deducting unity
- *Index = (DWORD)(iIndex - 1);
- bRet = TRUE;
- }
-
- return bRet;
+ int iIndex = __builtin_ffsll(qwMask);
+ // Set the Index after deducting unity
+ *Index = (DWORD)(iIndex - 1);
+ // Both GCC and Clang generate better, smaller code if we check whether the
+ // mask was/is zero rather than the equivalent check that iIndex is zero.
+ return qwMask != 0 ? TRUE : FALSE;
}
FORCEINLINE void PAL_ArmInterlockedOperationBarrier()