diff options
author | Jack Pappas <jack-pappas@users.noreply.github.com> | 2018-11-01 01:50:23 -0400 |
---|---|---|
committer | Jan Kotas <jkotas@microsoft.com> | 2018-10-31 22:50:23 -0700 |
commit | 6fe7effad7fddf8d5dc0b3ac3d5be5ec80e158ff (patch) | |
tree | b8dafadb9aa2c9dcdf77f9dad84d3c79a42dedef /src/pal/inc/pal.h | |
parent | d378770b6c21c5b74692cc251a3f8c58b6f377db (diff) | |
download | coreclr-6fe7effad7fddf8d5dc0b3ac3d5be5ec80e158ff.tar.gz coreclr-6fe7effad7fddf8d5dc0b3ac3d5be5ec80e158ff.tar.bz2 coreclr-6fe7effad7fddf8d5dc0b3ac3d5be5ec80e158ff.zip |
Make BitScanForward/BitScanForward64 PAL wrappers branchless. (#20412)
The BitScanForward/BitScanForward64 wrapper functions from the PAL and
gcenv have been modified so they're faster (and branchless), while also
adhering more closely to the behavior of the MSVC intrinsics.
Use _BitScanForward64 when targeting 64-bit Windows.
The _WIN32 macro is always defined by MSVC, even when targeting 64-bit
versions of Windows. Use the _WIN64 macro instead to check whether the
build is targeting 64-bit Windows, and if so, use the _BitScanForward64
intrinsic for the BitScanForward64 wrapper instead of the 32-bit-based
fallback.
Diffstat (limited to 'src/pal/inc/pal.h')
-rw-r--r-- | src/pal/inc/pal.h | 38 |
1 files changed, 15 insertions, 23 deletions
diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index f117a6eb9c..690804013e 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -3254,10 +3254,10 @@ typedef EXCEPTION_DISPOSITION (PALAPI *PVECTORED_EXCEPTION_HANDLER)( // Define BitScanForward64 and BitScanForward // Per MSDN, BitScanForward64 will search the mask data from LSB to MSB for a set bit. -// If one is found, its bit position is returned in the out PDWORD argument and 1 is returned. -// Otherwise, 0 is returned. +// If one is found, its bit position is stored in the out PDWORD argument and 1 is returned; +// otherwise, an undefined value is stored in the out PDWORD argument and 0 is returned. // -// On GCC, the equivalent function is __builtin_ffsl. It returns 1+index of the least +// On GCC, the equivalent function is __builtin_ffsll. It returns 1+index of the least // significant set bit, or 0 if if mask is zero. // // The same is true for BitScanForward, except that the GCC function is __builtin_ffs. @@ -3270,16 +3270,12 @@ BitScanForward( IN OUT PDWORD Index, IN UINT qwMask) { - unsigned char bRet = FALSE; - int iIndex = __builtin_ffsl(qwMask); - if (iIndex != 0) - { - // Set the Index after deducting unity - *Index = (DWORD)(iIndex - 1); - bRet = TRUE; - } - - return bRet; + int iIndex = __builtin_ffs(qwMask); + // Set the Index after deducting unity + *Index = (DWORD)(iIndex - 1); + // Both GCC and Clang generate better, smaller code if we check whether the + // mask was/is zero rather than the equivalent check that iIndex is zero. + return qwMask != 0 ? TRUE : FALSE; } EXTERN_C @@ -3291,16 +3287,12 @@ BitScanForward64( IN OUT PDWORD Index, IN UINT64 qwMask) { - unsigned char bRet = FALSE; - int iIndex = __builtin_ffsl(qwMask); - if (iIndex != 0) - { - // Set the Index after deducting unity - *Index = (DWORD)(iIndex - 1); - bRet = TRUE; - } - - return bRet; + int iIndex = __builtin_ffsll(qwMask); + // Set the Index after deducting unity + *Index = (DWORD)(iIndex - 1); + // Both GCC and Clang generate better, smaller code if we check whether the + // mask was/is zero rather than the equivalent check that iIndex is zero. + return qwMask != 0 ? TRUE : FALSE; } FORCEINLINE void PAL_ArmInterlockedOperationBarrier() |