summaryrefslogtreecommitdiff
path: root/src/jit/bitsetasuint64.h
diff options
context:
space:
mode:
authorBruce Forstall <brucefo@microsoft.com>2017-05-19 13:58:35 -0700
committerGitHub <noreply@github.com>2017-05-19 13:58:35 -0700
commit698a89e6e2d6866666dd942bdf7506f96fb8310e (patch)
tree60e7a4d7b8942793d7772281a6dfbf891a49be42 /src/jit/bitsetasuint64.h
parent6ce65bd1bc1570e4e37b6f6c7c5acb640e6dcac5 (diff)
downloadcoreclr-698a89e6e2d6866666dd942bdf7506f96fb8310e.tar.gz
coreclr-698a89e6e2d6866666dd942bdf7506f96fb8310e.tar.bz2
coreclr-698a89e6e2d6866666dd942bdf7506f96fb8310e.zip
Improve BitVecOps<>::Iter::NextElem (#11696)
* Improve BitVecOps<>::Iter::NextElem Tweak the implementation, to reduce the number of instructions executed in the hot path. Also, don't pass "env" to NextElem; it can be stored by Init() if required. For non-inlined calls, this saves setting up one argument. Use a `m_bsEnd` end condition. This eliminates the need to handle short/long differently, and reduces conditions when updating the current bits to iterate over in the long case. Overall, pin shows this reduces instruction count of superpmi over a minopts test run by 2.6% (NextElem is very hot). Also, fix BitSetAsUInt64 NextElem() iterator: It should store and updated its own bit count, and not depend on the value passed in to be the correct latest bit count.
Diffstat (limited to 'src/jit/bitsetasuint64.h')
-rw-r--r--src/jit/bitsetasuint64.h15
1 files changed, 10 insertions, 5 deletions
diff --git a/src/jit/bitsetasuint64.h b/src/jit/bitsetasuint64.h
index aec4d05c35..f88f6d63af 100644
--- a/src/jit/bitsetasuint64.h
+++ b/src/jit/bitsetasuint64.h
@@ -208,23 +208,28 @@ public:
{
UINT64 m_bits;
+ // The number of bits that have already been iterated over (set or clear).
+ unsigned m_bitNum;
+
public:
- Iter(Env env, const UINT64& bits) : m_bits(bits)
+ Iter(Env env, const UINT64& bits) : m_bits(bits), m_bitNum(0)
{
}
- bool NextElem(Env env, unsigned* pElem)
+ bool NextElem(unsigned* pElem)
{
+ // TODO-Throughtput: use BitScanForward64() intrinsic (see short/long implementation).
if (m_bits)
{
- unsigned bitNum = *pElem;
+ unsigned bitNum = m_bitNum;
while ((m_bits & 0x1) == 0)
{
bitNum++;
m_bits >>= 1;
}
- *pElem = bitNum;
- m_bits &= ~0x1;
+ *pElem = bitNum;
+ m_bitNum = bitNum + 1;
+ m_bits >>= 1;
return true;
}
else