summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichelle McDaniel <adiaaida@gmail.com>2016-10-19 10:43:15 -0700
committerMichelle McDaniel <adiaaida@gmail.com>2016-10-20 12:40:32 -0700
commitd24a3008cd1bbed78dd4e82414228da6e21361d2 (patch)
tree75a9b8c1f50ad9086646c2588eadd1d7313dc26c
parent2eefb2d2e64caec9e4c4bbbe805495109e35a1c3 (diff)
downloadcoreclr-d24a3008cd1bbed78dd4e82414228da6e21361d2.tar.gz
coreclr-d24a3008cd1bbed78dd4e82414228da6e21361d2.tar.bz2
coreclr-d24a3008cd1bbed78dd4e82414228da6e21361d2.zip
Implement DecomposeRotate
fgRecognizeAndMorphBitwiseRotation can potentially morph a tree with a TYP_LONG return type on x86 if the rotate amount is a GT_CNS_INT. This change implements DecomposeRotate for those rotate instructions. There are 5 cases: 1) Rotate by 0: do nothing. 2) Rotate by 32: swap hi and lo. 3) Rotate by < 32: produce a shld/shld (for GT_ROL) or a shrd/shrd (for GT_ROR). 4) Rotate by >= 32: swap hi and lo, subtract from the rotate amount 32, and do option 3. 5) Rotate by >= 64: do rotateAmount % 64 to get the rotate amount between 0 and 63, then do one of options 1-4. This change also updates CodegenBringUpTests\Rotate.cs to exercise these paths.
-rw-r--r--src/jit/decomposelongs.cpp134
-rw-r--r--src/jit/decomposelongs.h1
-rw-r--r--tests/src/JIT/CodeGenBringUpTests/Rotate.cs70
3 files changed, 203 insertions, 2 deletions
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index 1e0f4df012..a9fc6b841d 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -244,6 +244,11 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
nextNode = DecomposeShift(use);
break;
+ case GT_ROL:
+ case GT_ROR:
+ nextNode = DecomposeRotate(use);
+ break;
+
case GT_LOCKADD:
case GT_XADD:
case GT_XCHG:
@@ -1294,6 +1299,135 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
}
//------------------------------------------------------------------------
+// DecomposeRotate: Decompose GT_ROL and GT_ROR with constant shift amounts. We can
+// inspect the rotate amount and decompose to the appropriate node types, generating
+// a shld/shld pattern for GT_ROL, a shrd/shrd pattern for GT_ROR, for most rotate
+// amounts.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeRotate(LIR::Use& use)
+{
+ GenTree* tree = use.Def();
+ GenTree* gtLong = tree->gtGetOp1();
+ GenTree* rotateByOp = tree->gtGetOp2();
+
+ genTreeOps oper = tree->OperGet();
+
+ assert((oper == GT_ROL) || (oper == GT_ROR));
+ assert(rotateByOp->IsCnsIntOrI());
+
+ // For longs, we need to change rols into two GT_LSH_HIs and rors into two GT_RSH_LOs
+ // so we will get:
+ //
+ // shld lo, hi, rotateAmount
+ // shld hi, loCopy, rotateAmount
+ //
+ // or:
+ //
+ // shrd lo, hi, rotateAmount
+ // shrd hi, loCopy, rotateAmount
+
+ if (oper == GT_ROL)
+ {
+ oper = GT_LSH_HI;
+ }
+ else
+ {
+ oper = GT_RSH_LO;
+ }
+
+ unsigned int count = rotateByOp->gtIntCon.gtIconVal;
+ Range().Remove(rotateByOp);
+
+ // Make sure the rotate amount is between 0 and 63.
+ assert((count < 64) && (count != 0));
+
+ GenTree* loOp1;
+ GenTree* hiOp1;
+
+ if (count > 32)
+ {
+ // If count > 32, we swap hi and lo, and subtract 32 from count
+ hiOp1 = gtLong->gtGetOp1();
+ loOp1 = gtLong->gtGetOp2();
+ count -= 32;
+ }
+ else
+ {
+ loOp1 = gtLong->gtGetOp1();
+ hiOp1 = gtLong->gtGetOp2();
+ }
+
+ GenTree* loResult;
+ GenTree* hiResult;
+
+ if (count == 32)
+ {
+ // If the rotate amount is 32, then swap hi and lo
+ gtLong->gtOp.gtOp1 = hiOp1;
+ gtLong->gtOp.gtOp2 = loOp1;
+
+ GenTree* next = tree->gtNext;
+ // Remove tree and don't do anything else.
+ Range().Remove(tree);
+ use.ReplaceWith(m_compiler, gtLong);
+ return next;
+ }
+ else
+ {
+ Range().Remove(gtLong);
+ loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, &gtLong->gtOp.gtOp1);
+ hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp2);
+
+ unsigned loOp1LclNum = loOp1->AsLclVarCommon()->gtLclNum;
+ unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum;
+
+ if (count > 32)
+ {
+ Range().Remove(hiOp1);
+ Range().Remove(loOp1);
+ }
+ else
+ {
+ Range().Remove(loOp1);
+ Range().Remove(hiOp1);
+ }
+
+ GenTree* rotateByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* rotateByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+
+ // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+ // generate the shld instruction
+ GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+ GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, hiCopy, loOp1);
+ loResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp, rotateByLo);
+
+ // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to
+ // generate the shld instruction
+ GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT);
+ GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1);
+ hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp, rotateByHi);
+
+ m_compiler->lvaIncRefCnts(loCopy);
+ m_compiler->lvaIncRefCnts(hiCopy);
+
+ Range().InsertBefore(tree, hiCopy, loOp1, loOp);
+ Range().InsertBefore(tree, rotateByLo, loResult);
+ Range().InsertBefore(tree, loCopy, hiOp1, hiOp);
+ Range().InsertBefore(tree, rotateByHi, hiResult);
+ }
+
+ Range().Remove(tree);
+
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
+}
+
+//------------------------------------------------------------------------
// DecomposeMul: Decompose GT_MUL. The only GT_MULs that make it to decompose are
// those with the GTF_MUL_64RSLT flag set. These muls result in a mul instruction that
// returns its result in two registers like GT_CALLs do. Additionally, these muls are
diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h
index 285d3490c2..8965a0b330 100644
--- a/src/jit/decomposelongs.h
+++ b/src/jit/decomposelongs.h
@@ -52,6 +52,7 @@ private:
GenTree* DecomposeNeg(LIR::Use& use);
GenTree* DecomposeArith(LIR::Use& use);
GenTree* DecomposeShift(LIR::Use& use);
+ GenTree* DecomposeRotate(LIR::Use& use);
GenTree* DecomposeMul(LIR::Use& use);
GenTree* DecomposeUMod(LIR::Use& use);
diff --git a/tests/src/JIT/CodeGenBringUpTests/Rotate.cs b/tests/src/JIT/CodeGenBringUpTests/Rotate.cs
index 74a936ef13..98ea563f85 100644
--- a/tests/src/JIT/CodeGenBringUpTests/Rotate.cs
+++ b/tests/src/JIT/CodeGenBringUpTests/Rotate.cs
@@ -102,12 +102,30 @@ public class Test
[MethodImpl(MethodImplOptions.NoInlining)]
static ulong rol64const()
{
- ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef;
+ ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0xabcdef123456789;
int amount = 16;
return (value >> (64 - amount)) | (value << amount);
}
[MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong rol64_16(ulong value)
+ {
+ return (value >> (64 - 16)) | (value << 16);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong rol64_32(ulong value)
+ {
+ return (value >> (64 - 32)) | (value << 32);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong rol64_33(ulong value)
+ {
+ return (value >> (64 - 33)) | (value << 33);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
ulong rol64field(int amount)
{
return (field << amount) | (field >> (64 - amount));
@@ -128,12 +146,30 @@ public class Test
[MethodImpl(MethodImplOptions.NoInlining)]
static ulong ror64const()
{
- ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef;
+ ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0xabcdef123456789;
int amount = flag() ? 5 : 5;
return (value << (64 - amount)) | (value >> amount);
}
[MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong ror64_5(ulong value)
+ {
+ return (value << (64 - 5)) | (value >> 5);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong ror64_32(ulong value)
+ {
+ return (value << (64 - 32)) | (value >> 32);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong ror64_33(ulong value)
+ {
+ return (value << (64 - 33)) | (value >> 33);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
static ulong ror64sfield(int amount)
{
return (s_field << (64 - amount)) | (s_field >> amount);
@@ -244,6 +280,21 @@ public class Test
return Fail;
}
+ if (rol64_16(0x123456789abcdef) != 0x456789abcdef0123)
+ {
+ return Fail;
+ }
+
+ if (rol64_32(0x123456789abcdef) != rol64(0x123456789abcdef, 32))
+ {
+ return Fail;
+ }
+
+ if (rol64_33(0x123456789abcdef) != rol64(0x123456789abcdef, 33))
+ {
+ return Fail;
+ }
+
if (ror64(0x123456789abcdef, 0) != 0x123456789abcdef)
{
return Fail;
@@ -259,6 +310,21 @@ public class Test
return Fail;
}
+ if (ror64_5(0x123456789abcdef) != 0x78091a2b3c4d5e6f)
+ {
+ return Fail;
+ }
+
+ if (ror64_32(0x123456789abcdef) != ror64(0x123456789abcdef, 32))
+ {
+ return Fail;
+ }
+
+ if (ror64_33(0x123456789abcdef) != ror64(0x123456789abcdef, 33))
+ {
+ return Fail;
+ }
+
if (rol32_call(0x12345678, 16) != 0x56781234)
{
return Fail;