summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Danes <onemihaid@hotmail.com>2017-04-15 21:43:14 +0300
committerMike Danes <onemihaid@hotmail.com>2017-06-07 19:29:37 +0300
commit64a4d4969363dd8a96afece3ff48e1b20cccba68 (patch)
tree7c05f325dad5a1bc612dc85f27e8359bf7a6c491
parent5b9680fcc9b6fe9429f4a649bf5215de273fcbec (diff)
downloadcoreclr-64a4d4969363dd8a96afece3ff48e1b20cccba68.tar.gz
coreclr-64a4d4969363dd8a96afece3ff48e1b20cccba68.tar.bz2
coreclr-64a4d4969363dd8a96afece3ff48e1b20cccba68.zip
Do unsigned magic division
-rw-r--r--src/jit/codegenxarch.cpp4
-rw-r--r--src/jit/jitstd/type_traits.h27
-rw-r--r--src/jit/lower.cpp207
-rw-r--r--src/jit/lower.h2
-rw-r--r--src/jit/utils.cpp177
-rw-r--r--src/jit/utils.h8
6 files changed, 393 insertions, 32 deletions
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 6bb1242f7d..e2f31874c4 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -535,10 +535,6 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
// Generate code to get the high N bits of a N*N=2N bit multiplication result
void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
{
- if (treeNode->OperGet() == GT_MULHI)
- {
- assert(!(treeNode->gtFlags & GTF_UNSIGNED));
- }
assert(!treeNode->gtOverflowEx());
regNumber targetReg = treeNode->gtRegNum;
diff --git a/src/jit/jitstd/type_traits.h b/src/jit/jitstd/type_traits.h
index f0f8518c40..06c8ad170f 100644
--- a/src/jit/jitstd/type_traits.h
+++ b/src/jit/jitstd/type_traits.h
@@ -194,4 +194,31 @@ struct make_unsigned<__int64>
typedef unsigned __int64 type;
};
+template<typename Type1>
+struct make_signed
+{
+};
+
+template<>
+struct make_signed<unsigned int>
+{
+ typedef signed int type;
+};
+
+#ifndef _HOST_UNIX_
+
+template<>
+struct make_signed<unsigned long>
+{
+ typedef signed long type;
+};
+
+#endif // !_HOST_UNIX_
+
+template<>
+struct make_signed<unsigned __int64>
+{
+ typedef signed __int64 type;
+};
+
} // namespace jit_std
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 5c46d3d235..70b46f054e 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -144,7 +144,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_UDIV:
case GT_UMOD:
- LowerUnsignedDivOrMod(node);
+ return LowerUnsignedDivOrMod(node->AsOp());
break;
case GT_DIV:
@@ -3979,46 +3979,199 @@ GenTree* Lowering::LowerAdd(GenTree* node)
}
//------------------------------------------------------------------------
-// LowerUnsignedDivOrMod: transform GT_UDIV/GT_UMOD nodes with a const power of 2
-// divisor into GT_RSZ/GT_AND nodes.
+// LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
//
// Arguments:
-// node - pointer to the GT_UDIV/GT_UMOD node to be lowered
+// divMod - pointer to the GT_UDIV/GT_UMOD node to be lowered
//
-void Lowering::LowerUnsignedDivOrMod(GenTree* node)
+// Notes:
+// - Transform UDIV/UMOD by power of 2 into RSZ/AND
+// - Transform UDIV by constant >= 2^(N-1) into GE
+// - Transform UDIV/UMOD by constant >= 3 into "magic division"
+
+GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
{
- assert((node->OperGet() == GT_UDIV) || (node->OperGet() == GT_UMOD));
+ assert(divMod->OperIs(GT_UDIV, GT_UMOD));
- GenTree* divisor = node->gtGetOp2();
- GenTree* dividend = node->gtGetOp1();
+ GenTree* next = divMod->gtNext;
+ GenTree* dividend = divMod->gtGetOp1();
+ GenTree* divisor = divMod->gtGetOp2();
- if (divisor->IsCnsIntOrI()
-#ifdef _TARGET_X86_
- && (dividend->OperGet() != GT_LONG)
+#if !defined(_TARGET_64BIT_)
+ if (dividend->OperIs(GT_LONG))
+ {
+ return next;
+ }
#endif
- )
+
+ if (!divisor->IsCnsIntOrI())
{
- size_t divisorValue = static_cast<size_t>(divisor->gtIntCon.IconValue());
+ return next;
+ }
- if (isPow2(divisorValue))
+ if (dividend->IsCnsIntOrI())
+ {
+ // We shouldn't see a divmod with constant operands here but if we do then it's likely
+ // because optimizations are disabled or it's a case that's supposed to throw an exception.
+ // Don't optimize this.
+ return next;
+ }
+
+ const var_types type = divMod->TypeGet();
+ assert((type == TYP_INT) || (type == TYP_I_IMPL));
+
+ size_t divisorValue = static_cast<size_t>(divisor->AsIntCon()->IconValue());
+
+ if (type == TYP_INT)
+ {
+ // Clear up the upper 32 bits of the value, they may be set to 1 because constants
+ // are treated as signed and stored in ssize_t which is 64 bit in size on 64 bit targets.
+ divisorValue &= UINT32_MAX;
+ }
+
+ if (divisorValue == 0)
+ {
+ return next;
+ }
+
+ const bool isDiv = divMod->OperIs(GT_UDIV);
+
+ if (isPow2(divisorValue))
+ {
+ genTreeOps newOper;
+
+ if (isDiv)
+ {
+ newOper = GT_RSZ;
+ divisorValue = genLog2(divisorValue);
+ }
+ else
{
- genTreeOps newOper;
+ newOper = GT_AND;
+ divisorValue -= 1;
+ }
- if (node->OperGet() == GT_UDIV)
- {
- newOper = GT_RSZ;
- divisorValue = genLog2(divisorValue);
- }
- else
- {
- newOper = GT_AND;
- divisorValue -= 1;
- }
+ divMod->SetOper(newOper);
+ divisor->AsIntCon()->SetIconValue(divisorValue);
+
+ return next;
+ }
+
+ if (isDiv)
+ {
+ // If the divisor is greater or equal than 2^(N - 1) then the result is 1
+ // iff the dividend is greater or equal than the divisor.
+ if (((type == TYP_INT) && (divisorValue > (UINT32_MAX / 2))) ||
+ ((type == TYP_LONG) && (divisorValue > (UINT64_MAX / 2))))
+ {
+ divMod->SetOper(GT_GE);
+ divMod->gtFlags |= GTF_UNSIGNED;
+ return next;
+ }
+ }
+
+// TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32/64
+#ifdef _TARGET_XARCH_
+ if (!comp->opts.MinOpts() && (divisorValue >= 3))
+ {
+ size_t magic;
+ bool add;
+ int shift;
- node->SetOper(newOper);
- divisor->gtIntCon.SetIconValue(divisorValue);
+ if (type == TYP_INT)
+ {
+ magic = MagicDivide::GetUnsigned32Magic(static_cast<uint32_t>(divisorValue), &add, &shift);
}
+ else
+ {
+#ifdef _TARGET_64BIT_
+ magic = MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &add, &shift);
+#else
+ unreached();
+#endif
+ }
+
+ // Depending on the "add" flag returned by GetUnsignedMagicNumberForDivide we need to generate:
+ // add == false (when divisor == 3 for example):
+ // div = (dividend MULHI magic) RSZ shift
+ // add == true (when divisor == 7 for example):
+ // mulhi = dividend MULHI magic
+ // div = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1)
+ const bool requiresAdjustment = add;
+ const bool requiresDividendMultiuse = requiresAdjustment || !isDiv;
+ const unsigned curBBWeight = m_block->getBBWeight(comp);
+ unsigned dividendLclNum = BAD_VAR_NUM;
+
+ if (requiresDividendMultiuse)
+ {
+ LIR::Use dividendUse(BlockRange(), &divMod->gtOp1, divMod);
+ dividendLclNum = dividendUse.ReplaceWithLclVar(comp, curBBWeight);
+ dividend = divMod->gtGetOp1();
+ }
+
+ // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node.
+ // The existing node will later be transformed into a GT_RSZ/GT_SUB that
+ // computes the final result. This way don't need to find and change the use
+ // of the existing node.
+ GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, dividend, divisor);
+ mulhi->gtFlags |= GTF_UNSIGNED;
+ divisor->AsIntCon()->SetIconValue(magic);
+ BlockRange().InsertBefore(divMod, mulhi);
+
+ if (requiresAdjustment)
+ {
+ GenTree* dividend = comp->gtNewLclvNode(dividendLclNum, type);
+ GenTree* sub = comp->gtNewOperNode(GT_SUB, type, dividend, mulhi);
+ BlockRange().InsertBefore(divMod, dividend, sub);
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+
+ GenTree* one = comp->gtNewIconNode(1, TYP_INT);
+ GenTree* rsz = comp->gtNewOperNode(GT_RSZ, type, sub, one);
+ BlockRange().InsertBefore(divMod, one, rsz);
+
+ LIR::Use mulhiUse(BlockRange(), &sub->gtOp.gtOp2, sub);
+ unsigned mulhiLclNum = mulhiUse.ReplaceWithLclVar(comp, curBBWeight);
+
+ GenTree* mulhiCopy = comp->gtNewLclvNode(mulhiLclNum, type);
+ GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhiCopy);
+ BlockRange().InsertBefore(divMod, mulhiCopy, add);
+ comp->lvaTable[mulhiLclNum].incRefCnts(curBBWeight, comp);
+
+ mulhi = add;
+ shift -= 1;
+ }
+
+ GenTree* shiftBy = comp->gtNewIconNode(shift, TYP_INT);
+ BlockRange().InsertBefore(divMod, shiftBy);
+
+ if (isDiv)
+ {
+ divMod->SetOper(GT_RSZ);
+ divMod->gtOp1 = mulhi;
+ divMod->gtOp2 = shiftBy;
+ }
+ else
+ {
+ GenTree* div = comp->gtNewOperNode(GT_RSZ, type, mulhi, shiftBy);
+
+ // divisor UMOD dividend = dividend SUB (div MUL divisor)
+ GenTree* divisor = comp->gtNewIconNode(divisorValue, type);
+ GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor);
+ GenTree* dividend = comp->gtNewLclvNode(dividendLclNum, type);
+
+ divMod->SetOper(GT_SUB);
+ divMod->gtOp1 = dividend;
+ divMod->gtOp2 = mul;
+
+ BlockRange().InsertBefore(divMod, div, divisor, mul, dividend);
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+ }
+
+ return mulhi;
}
+#endif
+
+ return next;
}
//------------------------------------------------------------------------
diff --git a/src/jit/lower.h b/src/jit/lower.h
index d9bb357089..73d2be68df 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -237,7 +237,7 @@ private:
// Per tree node member functions
void LowerStoreInd(GenTree* node);
GenTree* LowerAdd(GenTree* node);
- void LowerUnsignedDivOrMod(GenTree* node);
+ GenTree* LowerUnsignedDivOrMod(GenTreeOp* divMod);
GenTree* LowerSignedDivOrMod(GenTree* node);
void LowerBlockStore(GenTreeBlk* blkNode);
diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp
index 9fbe394a21..c8f770f1a3 100644
--- a/src/jit/utils.cpp
+++ b/src/jit/utils.cpp
@@ -1808,3 +1808,180 @@ float FloatingPointUtils::round(float x)
return _copysignf(flrTempVal, x);
}
+
+namespace MagicDivide
+{
+template <int TableBase = 0, int TableSize, typename Magic>
+static const Magic* TryGetMagic(const Magic (&table)[TableSize], typename Magic::DivisorType index)
+{
+ if ((index < TableBase) || (TableBase + TableSize <= index))
+ {
+ return nullptr;
+ }
+
+ const Magic* p = &table[index - TableBase];
+
+ if (p->magic == 0)
+ {
+ return nullptr;
+ }
+
+ return p;
+};
+
+template <typename T>
+struct UnsignedMagic
+{
+ typedef T DivisorType;
+
+ T magic;
+ bool add;
+ int shift;
+};
+
+template <typename T>
+const UnsignedMagic<T>* TryGetUnsignedMagic(T divisor)
+{
+ return nullptr;
+}
+
+template <>
+const UnsignedMagic<uint32_t>* TryGetUnsignedMagic(uint32_t divisor)
+{
+ static const UnsignedMagic<uint32_t> table[]{
+ {0xaaaaaaab, false, 1}, // 3
+ {},
+ {0xcccccccd, false, 2}, // 5
+ {0xaaaaaaab, false, 2}, // 6
+ {0x24924925, true, 3}, // 7
+ {},
+ {0x38e38e39, false, 1}, // 9
+ {0xcccccccd, false, 3}, // 10
+ {0xba2e8ba3, false, 3}, // 11
+ {0xaaaaaaab, false, 3}, // 12
+ };
+
+ return TryGetMagic<3>(table, divisor);
+}
+
+template <>
+const UnsignedMagic<uint64_t>* TryGetUnsignedMagic(uint64_t divisor)
+{
+ static const UnsignedMagic<uint64_t> table[]{
+ {0xaaaaaaaaaaaaaaab, false, 1}, // 3
+ {},
+ {0xcccccccccccccccd, false, 2}, // 5
+ {0xaaaaaaaaaaaaaaab, false, 2}, // 6
+ {0x2492492492492493, true, 3}, // 7
+ {},
+ {0xe38e38e38e38e38f, false, 3}, // 9
+ {0xcccccccccccccccd, false, 3}, // 10
+ {0x2e8ba2e8ba2e8ba3, false, 1}, // 11
+ {0xaaaaaaaaaaaaaaab, false, 3}, // 12
+ };
+
+ return TryGetMagic<3>(table, divisor);
+}
+
+//------------------------------------------------------------------------
+// GetUnsignedMagic: Generates a magic number and shift amount for the magic
+// number unsigned division optimization.
+//
+// Arguments:
+// d - The divisor
+// add - Pointer to a flag indicating the kind of code to generate
+// shift - Pointer to the shift value to be returned
+//
+// Returns:
+// The magic number.
+//
+// Notes:
+// This code is adapted from _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58.
+// The paper is based on "Division by invariant integers using multiplication"
+// by Torbjorn Granlund and Peter L. Montgomery in PLDI 94
+
+template <typename T>
+T GetUnsignedMagic(T d, bool* add /*out*/, int* shift /*out*/)
+{
+ assert((d >= 3) && !isPow2(d));
+
+ const UnsignedMagic<T>* magic = TryGetUnsignedMagic(d);
+
+ if (magic != nullptr)
+ {
+ *shift = magic->shift;
+ *add = magic->add;
+ return magic->magic;
+ }
+
+ typedef typename jitstd::make_signed<T>::type ST;
+
+ const unsigned bits = sizeof(T) * 8;
+ const unsigned bitsMinus1 = bits - 1;
+ const T twoNMinus1 = T(1) << bitsMinus1;
+
+ *add = false;
+ const T nc = -ST(1) - -ST(d) % ST(d);
+ unsigned p = bitsMinus1;
+ T q1 = twoNMinus1 / nc;
+ T r1 = twoNMinus1 - (q1 * nc);
+ T q2 = (twoNMinus1 - 1) / d;
+ T r2 = (twoNMinus1 - 1) - (q2 * d);
+ T delta;
+
+ do
+ {
+ p++;
+
+ if (r1 >= (nc - r1))
+ {
+ q1 = 2 * q1 + 1;
+ r1 = 2 * r1 - nc;
+ }
+ else
+ {
+ q1 = 2 * q1;
+ r1 = 2 * r1;
+ }
+
+ if ((r2 + 1) >= (d - r2))
+ {
+ if (q2 >= (twoNMinus1 - 1))
+ {
+ *add = true;
+ }
+
+ q2 = 2 * q2 + 1;
+ r2 = 2 * r2 + 1 - d;
+ }
+ else
+ {
+ if (q2 >= twoNMinus1)
+ {
+ *add = true;
+ }
+
+ q2 = 2 * q2;
+ r2 = 2 * r2 + 1;
+ }
+
+ delta = d - 1 - r2;
+
+ } while ((p < (bits * 2)) && ((q1 < delta) || ((q1 == delta) && (r1 == 0))));
+
+ *shift = p - bits; // resulting shift
+ return q2 + 1; // resulting magic number
+}
+
+uint32_t GetUnsigned32Magic(uint32_t d, bool* add /*out*/, int* shift /*out*/)
+{
+ return GetUnsignedMagic<uint32_t>(d, add, shift);
+}
+
+#ifdef _TARGET_64BIT_
+uint64_t GetUnsigned64Magic(uint64_t d, bool* add /*out*/, int* shift /*out*/)
+{
+ return GetUnsignedMagic<uint64_t>(d, add, shift);
+}
+#endif
+}
diff --git a/src/jit/utils.h b/src/jit/utils.h
index b41cf84a1e..fe785aa623 100644
--- a/src/jit/utils.h
+++ b/src/jit/utils.h
@@ -718,4 +718,12 @@ private:
CritSecHolder& operator=(const CritSecHolder&) = delete;
};
+namespace MagicDivide
+{
+uint32_t GetUnsigned32Magic(uint32_t d, bool* add /*out*/, int* shift /*out*/);
+#ifdef _TARGET_64BIT_
+uint64_t GetUnsigned64Magic(uint64_t d, bool* add /*out*/, int* shift /*out*/);
+#endif
+}
+
#endif // _UTILS_H_