summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichelle McDaniel <adiaaida@gmail.com>2016-09-09 10:44:03 -0700
committerMichelle McDaniel <adiaaida@gmail.com>2016-09-16 13:08:29 -0700
commit5c5e53363fd7ae7228764a006de1b1f158e64a16 (patch)
tree8320293693383101ff3517f746dec39c4130f615
parent832a7498bf7aff89a70800c5f02517866f93dc1f (diff)
downloadcoreclr-5c5e53363fd7ae7228764a006de1b1f158e64a16.tar.gz
coreclr-5c5e53363fd7ae7228764a006de1b1f158e64a16.tar.bz2
coreclr-5c5e53363fd7ae7228764a006de1b1f158e64a16.zip
Add optimization for shift by CNS_INT
This change adds support for shifting by a GT_CNS_INT without going through a helper. If the shiftOp is a GT_CNS_INT we do several transformations based on the shift amount: If the shift amount is 0, the shift is a nop, so we just put together the hi and lo ops as a GT_LONG. If the shift amount is < 32, we generate a shl/shld pattern, a shr/shrd pattern or a sar/shrd pattern, depending on the oper. The first operand of the shrd/shld is a GT_LONG, which we crack in codegen, using it essentially as two int operands, rather than creating a tri op GenTree node (essentially so that we can have 3 operands, instead of the normal two). If the shift amount is 32, it differs between shifting left and shifting right. For GT_LSH, we move the loOp into the hiResult and set the loResult to 0. For GT_RSZ, we move the hiOp into the loResult, and set the hiResult to 0. For GT_RSH, we move the hiOp into the loResult, and set the hiResult to a 31 bit signed shift of the hiOp to sign extend. If the shift amount is less than 64, but larger than 32: for GT_LSH, the hiResult is a shift of the loOp by shift amount - 32 (the move from lo into hi is the 32 bit shift). We set the loResult to 0. For GT_RSH and GT_RSZ, the loResult is a right shift (signed for GT_RSH) of the hiOp by shift amount - 32. The hiResult is 0 for GT_RSZ, and a 31 bit signed shift of hiOp1 for GT_RSH. If the shift amount is >= 64, we set both hiResult and loResult to 0 for GT_LSH and GT_RSZ, and do a sign extend shift to set hiResult and loResult to the sign of the original hiOp for GT_RSH.
-rw-r--r--src/jit/codegenlinear.h4
-rw-r--r--src/jit/codegenxarch.cpp79
-rw-r--r--src/jit/decomposelongs.cpp379
-rw-r--r--src/jit/decomposelongs.h2
-rw-r--r--src/jit/gtlist.h10
-rw-r--r--src/jit/lowerxarch.cpp29
6 files changed, 452 insertions, 51 deletions
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 6cc437bd8e..793d6d2656 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -145,6 +145,10 @@ void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFl
void genCodeForShift(GenTreePtr tree);
+#if defined(_TARGET_X86_)
+void genCodeForShiftLong(GenTreePtr tree);
+#endif
+
#ifdef _TARGET_XARCH_
void genCodeForShiftRMW(GenTreeStoreInd* storeInd);
#endif // _TARGET_XARCH_
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 402d80097d..98fb24936b 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -2069,6 +2069,17 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// genCodeForShift() calls genProduceReg()
break;
+#if !defined(_TARGET_64BIT_)
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+ // TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't
+ // need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to
+ // targetReg if sourceHi is a contained mem-op). Similarly for GT_RSH_LO, sourceLo could be marked as
+ // contained memory-op. Even if not a memory-op, we could mark it as reg-optional.
+ genCodeForShiftLong(treeNode);
+ break;
+#endif
+
case GT_CAST:
if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
{
@@ -4694,6 +4705,12 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_SUB_HI:
ins = INS_sbb;
break;
+ case GT_LSH_HI:
+ ins = INS_shld;
+ break;
+ case GT_RSH_LO:
+ ins = INS_shrd;
+ break;
#endif // !defined(_TARGET_64BIT_)
default:
unreached();
@@ -4731,6 +4748,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
regNumber operandReg = operand->gtRegNum;
GenTreePtr shiftBy = tree->gtGetOp2();
+
if (shiftBy->isContainedIntOrIImmed())
{
// First, move the operand to the destination register and
@@ -4769,6 +4787,67 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
genProduceReg(tree);
}
+#ifdef _TARGET_X86_
+//------------------------------------------------------------------------
+// genCodeForShiftLong: Generates the code sequence for a GenTree node that
+// represents a three operand bit shift or rotate operation (<<Hi, >>Lo).
+//
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+// b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant
+//
+void CodeGen::genCodeForShiftLong(GenTreePtr tree)
+{
+ // Only the non-RMW case here.
+ genTreeOps oper = tree->OperGet();
+ assert(oper == GT_LSH_HI || oper == GT_RSH_LO);
+
+ GenTree* operand = tree->gtOp.gtOp1;
+ assert(operand->OperGet() == GT_LONG);
+ assert(!operand->gtOp.gtOp1->isContained());
+ assert(!operand->gtOp.gtOp2->isContained());
+
+ GenTree* operandLo = operand->gtGetOp1();
+ GenTree* operandHi = operand->gtGetOp2();
+
+ regNumber regLo = operandLo->gtRegNum;
+ regNumber regHi = operandHi->gtRegNum;
+
+ genConsumeOperands(tree->AsOp());
+
+ var_types targetType = tree->TypeGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+
+ GenTreePtr shiftBy = tree->gtGetOp2();
+
+ assert(shiftBy->isContainedIntOrIImmed());
+
+ unsigned int count = shiftBy->AsIntConCommon()->IconValue();
+
+ regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo;
+
+ if (regResult != tree->gtRegNum)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType);
+ }
+
+ if (oper == GT_LSH_HI)
+ {
+ inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regLo, count);
+ }
+ else
+ {
+ assert(oper == GT_RSH_LO);
+ inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regHi, count);
+ }
+
+ genProduceReg(tree);
+}
+#endif
+
//------------------------------------------------------------------------
// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index 00745c066e..c3be74c74a 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -280,23 +280,22 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
// Arguments:
// use - the LIR::Use object for the def that needs to be decomposed.
// loResult - the decomposed low part
-// hiResult - the decomposed high part. This must follow loResult in the linear order,
-// as the new GT_LONG node will be inserted immediately after it.
+// hiResult - the decomposed high part
+// insertResultAfter - the node that the GT_LONG should be inserted after
//
// Return Value:
// The next node to process.
//
-GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult)
+GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter)
{
assert(use.IsInitialized());
assert(loResult != nullptr);
assert(hiResult != nullptr);
assert(Range().Contains(loResult));
assert(Range().Contains(hiResult));
- assert(loResult->Precedes(hiResult));
GenTree* gtLong = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loResult, hiResult);
- Range().InsertAfter(hiResult, gtLong);
+ Range().InsertAfter(insertResultAfter, gtLong);
use.ReplaceWith(m_compiler, gtLong);
@@ -352,7 +351,7 @@ GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use)
m_compiler->lvaIncRefCnts(loResult);
m_compiler->lvaIncRefCnts(hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -376,7 +375,7 @@ GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use)
GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4);
Range().InsertAfter(loResult, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -564,7 +563,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
NYI("Unimplemented cast decomposition");
}
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -591,7 +590,7 @@ GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use)
GenTree* hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, hiVal);
Range().InsertAfter(loResult, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -740,7 +739,7 @@ GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use)
Range().InsertAfter(indLow, addrBaseHigh, addrHigh, indHigh);
- return FinalizeDecomposition(use, indLow, indHigh);
+ return FinalizeDecomposition(use, indLow, indHigh, indHigh);
}
//------------------------------------------------------------------------
@@ -772,7 +771,7 @@ GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use)
GenTree* hiResult = new (m_compiler, GT_NOT) GenTreeOp(GT_NOT, TYP_INT, hiOp1, nullptr);
Range().InsertAfter(loResult, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -817,7 +816,7 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
Range().InsertAfter(loResult, zero, hiAdjust, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -878,14 +877,19 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use)
}
}
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
-// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use
-// the shift helper functions, so we here convert the shift into a helper call by
-// pulling its arguments out of linear order and making them the args to a call, then
-// replacing the original node with the new call.
+// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes being shifted
+// by a constant int, we can inspect the shift amount and decompose to the appropriate
+// node types, generating a shl/shld pattern for GT_LSH, a shrd/shr pattern for GT_RSZ,
+// and a shrd/sar pattern for GT_SHR for most shift amounts. Shifting by 0, >= 32 and
+// >= 64 are special cased to produce better code patterns.
+//
+// For all other shift nodes, we need to use the shift helper functions, so we here convert
+// the shift into a helper call by pulling its arguments out of linear order and making
+// them the args to a call, then replacing the original node with the new call.
//
// Arguments:
// use - the LIR::Use object for the def that needs to be decomposed.
@@ -899,64 +903,339 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
GenTree* tree = use.Def();
GenTree* gtLong = tree->gtGetOp1();
+ GenTree* oldShiftByOp = tree->gtGetOp2();
+
genTreeOps oper = tree->OperGet();
+ genTreeOps shiftByOper = oldShiftByOp->OperGet();
assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
- LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
- loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ unsigned loOp1LclNum;
+ unsigned hiOp1LclNum;
- LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
- hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ if (gtLong->gtOp.gtOp1->OperGet() != GT_LCL_VAR)
+ {
+ LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
+ loOp1LclNum = loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ }
+ else
+ {
+ loOp1LclNum = gtLong->gtOp.gtOp1->AsLclVarCommon()->gtLclNum;
+ }
- LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree);
- shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ if (gtLong->gtOp.gtOp2->OperGet() != GT_LCL_VAR)
+ {
+ LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
+ hiOp1LclNum = hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ }
+ else
+ {
+ hiOp1LclNum = gtLong->gtOp.gtOp2->AsLclVarCommon()->gtLclNum;
+ }
GenTree* loOp1 = gtLong->gtGetOp1();
GenTree* hiOp1 = gtLong->gtGetOp2();
- GenTree* shiftWidthOp = tree->gtGetOp2();
-
Range().Remove(gtLong);
Range().Remove(loOp1);
Range().Remove(hiOp1);
- Range().Remove(shiftWidthOp);
+ // If we are shifting by a constant int, we do not want to use a helper, instead, we decompose.
+ if (shiftByOper == GT_CNS_INT)
+ {
+ unsigned int count = oldShiftByOp->gtIntCon.gtIconVal;
+ Range().Remove(oldShiftByOp);
- // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen
- // and generate the shift instructions ourselves there, rather than replacing it with a helper call.
+ GenTree* loResult;
+ GenTree* hiResult;
- unsigned helper;
+ GenTree* insertAfter;
- switch (oper)
- {
- case GT_LSH:
- helper = CORINFO_HELP_LLSH;
+ switch (oper)
+ {
+ case GT_LSH:
+ {
+ if (count == 0)
+ {
+ // Do nothing.
+ loResult = loOp1;
+ hiResult = hiOp1;
+
+ Range().InsertBefore(tree, loResult, hiResult);
+
+ insertAfter = hiResult;
+ }
+ else if (count < 32)
+ {
+ // Hi is a GT_LSH_HI, lo is a GT_LSH. Will produce:
+ // reg1 = lo
+ // shl lo, shift
+ // shld hi, reg1, shift
+
+ GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+
+ loResult = m_compiler->gtNewOperNode(GT_LSH, TYP_INT, loOp1, shiftByLo);
+
+ // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to
+ // generate the shld instruction
+ GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT);
+ GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1);
+ hiResult = m_compiler->gtNewOperNode(GT_LSH_HI, TYP_INT, hiOp, shiftByHi);
+
+ m_compiler->lvaIncRefCnts(loCopy);
+
+ Range().InsertBefore(tree, loCopy, hiOp1, hiOp);
+ Range().InsertBefore(tree, shiftByHi, hiResult);
+ Range().InsertBefore(tree, loOp1, shiftByLo, loResult);
+
+ insertAfter = loResult;
+ }
+ else
+ {
+ assert(count >= 32);
+
+ // Zero out loResult (shift of >= 32 bits shifts all lo bits to hiResult)
+ loResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertBefore(tree, loResult);
+
+ if (count < 64)
+ {
+ if (count == 32)
+ {
+ // Move loOp1 into hiResult (shift of 32 bits is just a mov of lo to hi)
+ hiResult = loOp1;
+ Range().InsertBefore(tree, hiResult);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+
+ // Move loOp1 into hiResult, do a GT_LSH with count - 32.
+ GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+ hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp1, shiftBy);
+ Range().InsertBefore(tree, loOp1, shiftBy, hiResult);
+ }
+ }
+ else
+ {
+ assert(count >= 64);
+
+ // Zero out hi (shift of >= 64 bits moves all the bits out of the two registers)
+ hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertBefore(tree, hiResult);
+ }
+
+ insertAfter = hiResult;
+ }
+ }
break;
- case GT_RSH:
- helper = CORINFO_HELP_LRSH;
+ case GT_RSZ:
+ {
+ if (count == 0)
+ {
+ // Do nothing.
+ loResult = loOp1;
+ hiResult = hiOp1;
+ Range().InsertBefore(tree, loResult, hiResult);
+ }
+ else if (count < 32)
+ {
+ // Hi is a GT_RSZ, lo is a GT_RSH_LO. Will produce:
+ // reg1 = hi
+ // shrd lo, reg1, shift
+ // shr hi, shift
+
+ GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+ m_compiler->lvaIncRefCnts(hiCopy);
+
+ hiResult = m_compiler->gtNewOperNode(GT_RSZ, TYP_INT, hiOp1, shiftByHi);
+
+ // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+ // generate the shrd instruction
+ GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy);
+ loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo);
+
+ Range().InsertBefore(tree, loOp1, hiCopy, loOp);
+ Range().InsertBefore(tree, shiftByLo, loResult);
+ Range().InsertBefore(tree, hiOp1, shiftByHi, hiResult);
+ }
+ else
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+ if (count == 32)
+ {
+ // Move hiOp1 into loResult.
+ loResult = hiOp1;
+ Range().InsertBefore(tree, loResult);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+
+ // Move hiOp1 into loResult, do a GT_RSZ with count - 32.
+ GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+ loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy);
+ Range().InsertBefore(tree, hiOp1, shiftBy, loResult);
+ }
+ }
+ else
+ {
+ assert(count >= 64);
+
+ // Zero out lo
+ loResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertBefore(tree, loResult);
+ }
+
+ // Zero out hi
+ hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertBefore(tree, hiResult);
+ }
+
+ insertAfter = hiResult;
+ }
break;
- case GT_RSZ:
- helper = CORINFO_HELP_LRSZ;
+ case GT_RSH:
+ {
+ if (count == 0)
+ {
+ // Do nothing.
+ loResult = loOp1;
+ hiResult = hiOp1;
+ Range().InsertBefore(tree, loResult, hiResult);
+ }
+ else if (count < 32)
+ {
+ // Hi is a GT_RSH, lo is a GT_RSH_LO. Will produce:
+ // reg1 = hi
+ // shrd lo, reg1, shift
+ // sar hi, shift
+
+ GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+ m_compiler->lvaIncRefCnts(hiCopy);
+
+ hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftByHi);
+
+ // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+ // generate the shrd instruction
+ GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy);
+ loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo);
+
+ Range().InsertBefore(tree, loOp1, hiCopy, loOp);
+ Range().InsertBefore(tree, shiftByLo, loResult);
+ Range().InsertBefore(tree, shiftByHi, hiOp1, hiResult);
+ }
+ else
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+ if (count == 32)
+ {
+ // Move hiOp1 into loResult.
+ loResult = hiOp1;
+ Range().InsertBefore(tree, loResult);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+
+ // Move hiOp1 into loResult, do a GT_RSH with count - 32.
+ GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+ loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy);
+ Range().InsertBefore(tree, hiOp1, shiftBy, loResult);
+ }
+
+ // Propagate sign bit in hiResult
+ GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+ GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+ hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, shiftBy);
+ Range().InsertBefore(tree, shiftBy, hiCopy, hiResult);
+
+ m_compiler->lvaIncRefCnts(hiCopy);
+ }
+ else
+ {
+ assert(count >= 64);
+
+ // Propagate sign bit in loResult
+ GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+ GenTree* loShiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+ loResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, loShiftBy);
+ Range().InsertBefore(tree, hiCopy, loShiftBy, loResult);
+
+ // Propagate sign bit in hiResult
+ GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+ hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftBy);
+ Range().InsertBefore(tree, shiftBy, hiOp1, hiResult);
+
+ m_compiler->lvaIncRefCnts(hiCopy);
+ }
+ }
+
+ insertAfter = hiResult;
+ }
break;
- default:
- unreached();
+ default:
+ unreached();
+ }
+
+ // Remove tree from Range
+ Range().Remove(tree);
+
+ return FinalizeDecomposition(use, loResult, hiResult, insertAfter);
}
+ else
+ {
+ GenTree* shiftByOp = oldShiftByOp;
+ if (shiftByOp->OperGet() != GT_LCL_VAR)
+ {
+ LIR::Use shiftByUse(Range(), &tree->gtOp.gtOp2, tree);
+ shiftByUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ shiftByOp = tree->gtGetOp2();
+ }
+
+ Range().Remove(shiftByOp);
- GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp);
+ unsigned helper;
- GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
+ switch (oper)
+ {
+ case GT_LSH:
+ helper = CORINFO_HELP_LLSH;
+ break;
+ case GT_RSH:
+ helper = CORINFO_HELP_LRSH;
+ break;
+ case GT_RSZ:
+ helper = CORINFO_HELP_LRSZ;
+ break;
+ default:
+ unreached();
+ }
+
+ GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftByOp);
- GenTreeCall* callNode = call->AsCall();
- ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
- retTypeDesc->InitializeLongReturnType(m_compiler);
+ GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
- call = m_compiler->fgMorphArgs(callNode);
- Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
+ GenTreeCall* callNode = call->AsCall();
+ ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+ retTypeDesc->InitializeLongReturnType(m_compiler);
- Range().Remove(tree);
- use.ReplaceWith(m_compiler, call);
- return call;
+ call = m_compiler->fgMorphArgs(callNode);
+ Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
+
+ Range().Remove(tree);
+ use.ReplaceWith(m_compiler, call);
+ return call;
+ }
}
//------------------------------------------------------------------------
@@ -1069,7 +1348,7 @@ GenTree* DecomposeLongs::DecomposeUMod(LIR::Use& use)
Range().InsertAfter(loResult, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h
index 9cb183fc0c..f087c3ec6d 100644
--- a/src/jit/decomposelongs.h
+++ b/src/jit/decomposelongs.h
@@ -55,7 +55,7 @@ private:
GenTree* DecomposeUMod(LIR::Use& use);
// Helper functions
- GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult);
+ GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter);
GenTree* StoreNodeToVar(LIR::Use& use);
static genTreeOps GetHiOper(genTreeOps oper);
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index bc2f8f2457..3bf360355b 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -179,6 +179,16 @@ GTNODE(MUL_LONG , "*long" ,GenTreeOp ,1,GTK_BINOP) // A
// helper calls. It is similar to GT_MULHI, the difference being that
// GT_MULHI drops the lo part of the result, whereas GT_MUL_LONG keeps
// both parts of the result.
+
+// The following are nodes that specify shifts that take a GT_LONG op1. The GT_LONG
+// contains the hi and lo parts of three operand shift form where one op will be
+// shifted into the other op as part of the operation (LSH_HI will shift
+// the high bits of the lo operand into the high operand as it shifts left. RSH_LO
+// will shift the lo bits of the high operand into the lo operand). LSH_HI
+// represents the high operation of a 64-bit left shift by a constant int, and
+// RSH_LO represents the lo operation of a 64-bit right shift by a constant int.
+GTNODE(LSH_HI , "<<Hi" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(RSH_LO , ">>Lo" ,GenTreeOp ,0,GTK_BINOP)
#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
#ifdef FEATURE_SIMD
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index f818809ffb..4ffb592c11 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -497,6 +497,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_RSZ:
case GT_ROL:
case GT_ROR:
+#ifdef _TARGET_X86_
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+#endif
TreeNodeInfoInitShiftRotate(tree);
break;
@@ -1035,6 +1039,31 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
GenTreePtr shiftBy = tree->gtOp.gtOp2;
GenTreePtr source = tree->gtOp.gtOp1;
+#ifdef _TARGET_X86_
+ // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
+ // we can have a three operand form. Increment the srcCount.
+ if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+ {
+ assert(source->OperGet() == GT_LONG);
+
+ info->srcCount++;
+
+ if (tree->OperGet() == GT_LSH_HI)
+ {
+ GenTreePtr sourceLo = source->gtOp.gtOp1;
+ sourceLo->gtLsraInfo.isDelayFree = true;
+ }
+ else
+ {
+ GenTreePtr sourceHi = source->gtOp.gtOp2;
+ sourceHi->gtLsraInfo.isDelayFree = true;
+ }
+
+ source->gtLsraInfo.hasDelayFreeSrc = true;
+ info->hasDelayFreeSrc = true;
+ }
+#endif
+
// x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
// We will allow whatever can be encoded - hope you know what you are doing.
if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||