From 5c5e53363fd7ae7228764a006de1b1f158e64a16 Mon Sep 17 00:00:00 2001 From: Michelle McDaniel Date: Fri, 9 Sep 2016 10:44:03 -0700 Subject: Add optimization for shift by CNS_INT This change adds support for shifting by a GT_CNS_INT without going through a helper. If the shiftOp is a GT_CNS_INT we do several transformations based on the shift amount: If the shift amount is 0, the shift is a nop, so we just put together the hi and lo ops as a GT_LONG. If the shift amount is < 32, we generate a shl/shld pattern, a shr/shrd pattern or a sar/shrd pattern, depending on the oper. The first operand of the shrd/shld is a GT_LONG, which we crack in codegen, using it essentially as two int operands, rather than creating a tri op GenTree node (essentially so that we can have 3 operands, instead of the normal two). If the shift amount is 32, it differs between shifting left and shifting right. For GT_LSH, we move the loOp into the hiResult and set the loResult to 0. For GT_RSZ, we move the hiOp into the loResult, and set the hiResult to 0. For GT_RSH, we move the hiOp into the loResult, and set the hiResult to a 31 bit signed shift of the hiOp to sign extend. If the shift amount is less than 64, but larger than 32: for GT_LSH, the hiResult is a shift of the loOp by shift amount - 32 (the move from lo into hi is the 32 bit shift). We set the loResult to 0. For GT_RSH and GT_RSZ, the loResult is a right shift (signed for GT_RSH) of the hiOp by shift amount - 32. The hiResult is 0 for GT_RSZ, and a 31 bit signed shift of hiOp1 for GT_RSH. If the shift amount is >= 64, we set both hiResult and loResult to 0 for GT_LSH and GT_RSZ, and do a sign extend shift to set hiResult and loResult to the sign of the original hiOp for GT_RSH. --- src/jit/codegenlinear.h | 4 + src/jit/codegenxarch.cpp | 79 ++++++++++ src/jit/decomposelongs.cpp | 379 +++++++++++++++++++++++++++++++++++++++------ src/jit/decomposelongs.h | 2 +- src/jit/gtlist.h | 10 ++ src/jit/lowerxarch.cpp | 29 ++++ 6 files changed, 452 insertions(+), 51 deletions(-) diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 6cc437bd8e..793d6d2656 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -145,6 +145,10 @@ void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFl void genCodeForShift(GenTreePtr tree); +#if defined(_TARGET_X86_) +void genCodeForShiftLong(GenTreePtr tree); +#endif + #ifdef _TARGET_XARCH_ void genCodeForShiftRMW(GenTreeStoreInd* storeInd); #endif // _TARGET_XARCH_ diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 402d80097d..98fb24936b 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -2069,6 +2069,17 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // genCodeForShift() calls genProduceReg() break; +#if !defined(_TARGET_64BIT_) + case GT_LSH_HI: + case GT_RSH_LO: + // TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't + // need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to + // targetReg if sourceHi is a contained mem-op). Similarly for GT_RSH_LO, sourceLo could be marked as + // contained memory-op. Even if not a memory-op, we could mark it as reg-optional. + genCodeForShiftLong(treeNode); + break; +#endif + case GT_CAST: if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1)) { @@ -4694,6 +4705,12 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) case GT_SUB_HI: ins = INS_sbb; break; + case GT_LSH_HI: + ins = INS_shld; + break; + case GT_RSH_LO: + ins = INS_shrd; + break; #endif // !defined(_TARGET_64BIT_) default: unreached(); @@ -4731,6 +4748,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree) regNumber operandReg = operand->gtRegNum; GenTreePtr shiftBy = tree->gtGetOp2(); + if (shiftBy->isContainedIntOrIImmed()) { // First, move the operand to the destination register and @@ -4769,6 +4787,67 @@ void CodeGen::genCodeForShift(GenTreePtr tree) genProduceReg(tree); } +#ifdef _TARGET_X86_ +//------------------------------------------------------------------------ +// genCodeForShiftLong: Generates the code sequence for a GenTree node that +// represents a three operand bit shift or rotate operation (<>Lo). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant +// +void CodeGen::genCodeForShiftLong(GenTreePtr tree) +{ + // Only the non-RMW case here. + genTreeOps oper = tree->OperGet(); + assert(oper == GT_LSH_HI || oper == GT_RSH_LO); + + GenTree* operand = tree->gtOp.gtOp1; + assert(operand->OperGet() == GT_LONG); + assert(!operand->gtOp.gtOp1->isContained()); + assert(!operand->gtOp.gtOp2->isContained()); + + GenTree* operandLo = operand->gtGetOp1(); + GenTree* operandHi = operand->gtGetOp2(); + + regNumber regLo = operandLo->gtRegNum; + regNumber regHi = operandHi->gtRegNum; + + genConsumeOperands(tree->AsOp()); + + var_types targetType = tree->TypeGet(); + instruction ins = genGetInsForOper(oper, targetType); + + GenTreePtr shiftBy = tree->gtGetOp2(); + + assert(shiftBy->isContainedIntOrIImmed()); + + unsigned int count = shiftBy->AsIntConCommon()->IconValue(); + + regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo; + + if (regResult != tree->gtRegNum) + { + inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType); + } + + if (oper == GT_LSH_HI) + { + inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regLo, count); + } + else + { + assert(oper == GT_RSH_LO); + inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regHi, count); + } + + genProduceReg(tree); +} +#endif + //------------------------------------------------------------------------ // genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that // represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example: diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp index 00745c066e..c3be74c74a 100644 --- a/src/jit/decomposelongs.cpp +++ b/src/jit/decomposelongs.cpp @@ -280,23 +280,22 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree) // Arguments: // use - the LIR::Use object for the def that needs to be decomposed. // loResult - the decomposed low part -// hiResult - the decomposed high part. This must follow loResult in the linear order, -// as the new GT_LONG node will be inserted immediately after it. +// hiResult - the decomposed high part +// insertResultAfter - the node that the GT_LONG should be inserted after // // Return Value: // The next node to process. // -GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult) +GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter) { assert(use.IsInitialized()); assert(loResult != nullptr); assert(hiResult != nullptr); assert(Range().Contains(loResult)); assert(Range().Contains(hiResult)); - assert(loResult->Precedes(hiResult)); GenTree* gtLong = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loResult, hiResult); - Range().InsertAfter(hiResult, gtLong); + Range().InsertAfter(insertResultAfter, gtLong); use.ReplaceWith(m_compiler, gtLong); @@ -352,7 +351,7 @@ GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use) m_compiler->lvaIncRefCnts(loResult); m_compiler->lvaIncRefCnts(hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -376,7 +375,7 @@ GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use) GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4); Range().InsertAfter(loResult, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -564,7 +563,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use) NYI("Unimplemented cast decomposition"); } - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -591,7 +590,7 @@ GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use) GenTree* hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, hiVal); Range().InsertAfter(loResult, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -740,7 +739,7 @@ GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use) Range().InsertAfter(indLow, addrBaseHigh, addrHigh, indHigh); - return FinalizeDecomposition(use, indLow, indHigh); + return FinalizeDecomposition(use, indLow, indHigh, indHigh); } //------------------------------------------------------------------------ @@ -772,7 +771,7 @@ GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use) GenTree* hiResult = new (m_compiler, GT_NOT) GenTreeOp(GT_NOT, TYP_INT, hiOp1, nullptr); Range().InsertAfter(loResult, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -817,7 +816,7 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use) Range().InsertAfter(loResult, zero, hiAdjust, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -878,14 +877,19 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use) } } - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ -// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use -// the shift helper functions, so we here convert the shift into a helper call by -// pulling its arguments out of linear order and making them the args to a call, then -// replacing the original node with the new call. +// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes being shifted +// by a constant int, we can inspect the shift amount and decompose to the appropriate +// node types, generating a shl/shld pattern for GT_LSH, a shrd/shr pattern for GT_RSZ, +// and a shrd/sar pattern for GT_SHR for most shift amounts. Shifting by 0, >= 32 and +// >= 64 are special cased to produce better code patterns. +// +// For all other shift nodes, we need to use the shift helper functions, so we here convert +// the shift into a helper call by pulling its arguments out of linear order and making +// them the args to a call, then replacing the original node with the new call. // // Arguments: // use - the LIR::Use object for the def that needs to be decomposed. @@ -899,64 +903,339 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use) GenTree* tree = use.Def(); GenTree* gtLong = tree->gtGetOp1(); + GenTree* oldShiftByOp = tree->gtGetOp2(); + genTreeOps oper = tree->OperGet(); + genTreeOps shiftByOper = oldShiftByOp->OperGet(); assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)); - LIR::Use loOp1Use(Range(), >Long->gtOp.gtOp1, gtLong); - loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + unsigned loOp1LclNum; + unsigned hiOp1LclNum; - LIR::Use hiOp1Use(Range(), >Long->gtOp.gtOp2, gtLong); - hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + if (gtLong->gtOp.gtOp1->OperGet() != GT_LCL_VAR) + { + LIR::Use loOp1Use(Range(), >Long->gtOp.gtOp1, gtLong); + loOp1LclNum = loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + } + else + { + loOp1LclNum = gtLong->gtOp.gtOp1->AsLclVarCommon()->gtLclNum; + } - LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree); - shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight); + if (gtLong->gtOp.gtOp2->OperGet() != GT_LCL_VAR) + { + LIR::Use hiOp1Use(Range(), >Long->gtOp.gtOp2, gtLong); + hiOp1LclNum = hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + } + else + { + hiOp1LclNum = gtLong->gtOp.gtOp2->AsLclVarCommon()->gtLclNum; + } GenTree* loOp1 = gtLong->gtGetOp1(); GenTree* hiOp1 = gtLong->gtGetOp2(); - GenTree* shiftWidthOp = tree->gtGetOp2(); - Range().Remove(gtLong); Range().Remove(loOp1); Range().Remove(hiOp1); - Range().Remove(shiftWidthOp); + // If we are shifting by a constant int, we do not want to use a helper, instead, we decompose. + if (shiftByOper == GT_CNS_INT) + { + unsigned int count = oldShiftByOp->gtIntCon.gtIconVal; + Range().Remove(oldShiftByOp); - // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen - // and generate the shift instructions ourselves there, rather than replacing it with a helper call. + GenTree* loResult; + GenTree* hiResult; - unsigned helper; + GenTree* insertAfter; - switch (oper) - { - case GT_LSH: - helper = CORINFO_HELP_LLSH; + switch (oper) + { + case GT_LSH: + { + if (count == 0) + { + // Do nothing. + loResult = loOp1; + hiResult = hiOp1; + + Range().InsertBefore(tree, loResult, hiResult); + + insertAfter = hiResult; + } + else if (count < 32) + { + // Hi is a GT_LSH_HI, lo is a GT_LSH. Will produce: + // reg1 = lo + // shl lo, shift + // shld hi, reg1, shift + + GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT); + + loResult = m_compiler->gtNewOperNode(GT_LSH, TYP_INT, loOp1, shiftByLo); + + // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to + // generate the shld instruction + GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT); + GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1); + hiResult = m_compiler->gtNewOperNode(GT_LSH_HI, TYP_INT, hiOp, shiftByHi); + + m_compiler->lvaIncRefCnts(loCopy); + + Range().InsertBefore(tree, loCopy, hiOp1, hiOp); + Range().InsertBefore(tree, shiftByHi, hiResult); + Range().InsertBefore(tree, loOp1, shiftByLo, loResult); + + insertAfter = loResult; + } + else + { + assert(count >= 32); + + // Zero out loResult (shift of >= 32 bits shifts all lo bits to hiResult) + loResult = m_compiler->gtNewZeroConNode(TYP_INT); + Range().InsertBefore(tree, loResult); + + if (count < 64) + { + if (count == 32) + { + // Move loOp1 into hiResult (shift of 32 bits is just a mov of lo to hi) + hiResult = loOp1; + Range().InsertBefore(tree, hiResult); + } + else + { + assert(count > 32 && count < 64); + + // Move loOp1 into hiResult, do a GT_LSH with count - 32. + GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT); + hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp1, shiftBy); + Range().InsertBefore(tree, loOp1, shiftBy, hiResult); + } + } + else + { + assert(count >= 64); + + // Zero out hi (shift of >= 64 bits moves all the bits out of the two registers) + hiResult = m_compiler->gtNewZeroConNode(TYP_INT); + Range().InsertBefore(tree, hiResult); + } + + insertAfter = hiResult; + } + } break; - case GT_RSH: - helper = CORINFO_HELP_LRSH; + case GT_RSZ: + { + if (count == 0) + { + // Do nothing. + loResult = loOp1; + hiResult = hiOp1; + Range().InsertBefore(tree, loResult, hiResult); + } + else if (count < 32) + { + // Hi is a GT_RSZ, lo is a GT_RSH_LO. Will produce: + // reg1 = hi + // shrd lo, reg1, shift + // shr hi, shift + + GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT); + m_compiler->lvaIncRefCnts(hiCopy); + + hiResult = m_compiler->gtNewOperNode(GT_RSZ, TYP_INT, hiOp1, shiftByHi); + + // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to + // generate the shrd instruction + GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy); + loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo); + + Range().InsertBefore(tree, loOp1, hiCopy, loOp); + Range().InsertBefore(tree, shiftByLo, loResult); + Range().InsertBefore(tree, hiOp1, shiftByHi, hiResult); + } + else + { + assert(count >= 32); + if (count < 64) + { + if (count == 32) + { + // Move hiOp1 into loResult. + loResult = hiOp1; + Range().InsertBefore(tree, loResult); + } + else + { + assert(count > 32 && count < 64); + + // Move hiOp1 into loResult, do a GT_RSZ with count - 32. + GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT); + loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy); + Range().InsertBefore(tree, hiOp1, shiftBy, loResult); + } + } + else + { + assert(count >= 64); + + // Zero out lo + loResult = m_compiler->gtNewZeroConNode(TYP_INT); + Range().InsertBefore(tree, loResult); + } + + // Zero out hi + hiResult = m_compiler->gtNewZeroConNode(TYP_INT); + Range().InsertBefore(tree, hiResult); + } + + insertAfter = hiResult; + } break; - case GT_RSZ: - helper = CORINFO_HELP_LRSZ; + case GT_RSH: + { + if (count == 0) + { + // Do nothing. + loResult = loOp1; + hiResult = hiOp1; + Range().InsertBefore(tree, loResult, hiResult); + } + else if (count < 32) + { + // Hi is a GT_RSH, lo is a GT_RSH_LO. Will produce: + // reg1 = hi + // shrd lo, reg1, shift + // sar hi, shift + + GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT); + m_compiler->lvaIncRefCnts(hiCopy); + + hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftByHi); + + // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to + // generate the shrd instruction + GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy); + loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo); + + Range().InsertBefore(tree, loOp1, hiCopy, loOp); + Range().InsertBefore(tree, shiftByLo, loResult); + Range().InsertBefore(tree, shiftByHi, hiOp1, hiResult); + } + else + { + assert(count >= 32); + if (count < 64) + { + if (count == 32) + { + // Move hiOp1 into loResult. + loResult = hiOp1; + Range().InsertBefore(tree, loResult); + } + else + { + assert(count > 32 && count < 64); + + // Move hiOp1 into loResult, do a GT_RSH with count - 32. + GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT); + loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy); + Range().InsertBefore(tree, hiOp1, shiftBy, loResult); + } + + // Propagate sign bit in hiResult + GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT); + GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT); + hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, shiftBy); + Range().InsertBefore(tree, shiftBy, hiCopy, hiResult); + + m_compiler->lvaIncRefCnts(hiCopy); + } + else + { + assert(count >= 64); + + // Propagate sign bit in loResult + GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT); + GenTree* loShiftBy = m_compiler->gtNewIconNode(31, TYP_INT); + loResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, loShiftBy); + Range().InsertBefore(tree, hiCopy, loShiftBy, loResult); + + // Propagate sign bit in hiResult + GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT); + hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftBy); + Range().InsertBefore(tree, shiftBy, hiOp1, hiResult); + + m_compiler->lvaIncRefCnts(hiCopy); + } + } + + insertAfter = hiResult; + } break; - default: - unreached(); + default: + unreached(); + } + + // Remove tree from Range + Range().Remove(tree); + + return FinalizeDecomposition(use, loResult, hiResult, insertAfter); } + else + { + GenTree* shiftByOp = oldShiftByOp; + if (shiftByOp->OperGet() != GT_LCL_VAR) + { + LIR::Use shiftByUse(Range(), &tree->gtOp.gtOp2, tree); + shiftByUse.ReplaceWithLclVar(m_compiler, m_blockWeight); + shiftByOp = tree->gtGetOp2(); + } + + Range().Remove(shiftByOp); - GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp); + unsigned helper; - GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList); + switch (oper) + { + case GT_LSH: + helper = CORINFO_HELP_LLSH; + break; + case GT_RSH: + helper = CORINFO_HELP_LRSH; + break; + case GT_RSZ: + helper = CORINFO_HELP_LRSZ; + break; + default: + unreached(); + } + + GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftByOp); - GenTreeCall* callNode = call->AsCall(); - ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc(); - retTypeDesc->InitializeLongReturnType(m_compiler); + GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList); - call = m_compiler->fgMorphArgs(callNode); - Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call)); + GenTreeCall* callNode = call->AsCall(); + ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc(); + retTypeDesc->InitializeLongReturnType(m_compiler); - Range().Remove(tree); - use.ReplaceWith(m_compiler, call); - return call; + call = m_compiler->fgMorphArgs(callNode); + Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call)); + + Range().Remove(tree); + use.ReplaceWith(m_compiler, call); + return call; + } } //------------------------------------------------------------------------ @@ -1069,7 +1348,7 @@ GenTree* DecomposeLongs::DecomposeUMod(LIR::Use& use) Range().InsertAfter(loResult, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h index 9cb183fc0c..f087c3ec6d 100644 --- a/src/jit/decomposelongs.h +++ b/src/jit/decomposelongs.h @@ -55,7 +55,7 @@ private: GenTree* DecomposeUMod(LIR::Use& use); // Helper functions - GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult); + GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter); GenTree* StoreNodeToVar(LIR::Use& use); static genTreeOps GetHiOper(genTreeOps oper); diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index bc2f8f2457..3bf360355b 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -179,6 +179,16 @@ GTNODE(MUL_LONG , "*long" ,GenTreeOp ,1,GTK_BINOP) // A // helper calls. It is similar to GT_MULHI, the difference being that // GT_MULHI drops the lo part of the result, whereas GT_MUL_LONG keeps // both parts of the result. + +// The following are nodes that specify shifts that take a GT_LONG op1. The GT_LONG +// contains the hi and lo parts of three operand shift form where one op will be +// shifted into the other op as part of the operation (LSH_HI will shift +// the high bits of the lo operand into the high operand as it shifts left. RSH_LO +// will shift the lo bits of the high operand into the lo operand). LSH_HI +// represents the high operation of a 64-bit left shift by a constant int, and +// RSH_LO represents the lo operation of a 64-bit right shift by a constant int. +GTNODE(LSH_HI , "<>Lo" ,GenTreeOp ,0,GTK_BINOP) #endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) #ifdef FEATURE_SIMD diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index f818809ffb..4ffb592c11 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -497,6 +497,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_RSZ: case GT_ROL: case GT_ROR: +#ifdef _TARGET_X86_ + case GT_LSH_HI: + case GT_RSH_LO: +#endif TreeNodeInfoInitShiftRotate(tree); break; @@ -1035,6 +1039,31 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) GenTreePtr shiftBy = tree->gtOp.gtOp2; GenTreePtr source = tree->gtOp.gtOp1; +#ifdef _TARGET_X86_ + // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that + // we can have a three operand form. Increment the srcCount. + if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) + { + assert(source->OperGet() == GT_LONG); + + info->srcCount++; + + if (tree->OperGet() == GT_LSH_HI) + { + GenTreePtr sourceLo = source->gtOp.gtOp1; + sourceLo->gtLsraInfo.isDelayFree = true; + } + else + { + GenTreePtr sourceHi = source->gtOp.gtOp2; + sourceHi->gtLsraInfo.isDelayFree = true; + } + + source->gtLsraInfo.hasDelayFreeSrc = true; + info->hasDelayFreeSrc = true; + } +#endif + // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off) // We will allow whatever can be encoded - hope you know what you are doing. if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) || -- cgit v1.2.3