diff options
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/codegenxarch.cpp | 53 | ||||
-rw-r--r-- | src/jit/compiler.cpp | 3 | ||||
-rw-r--r-- | src/jit/decomposelongs.cpp | 130 | ||||
-rw-r--r-- | src/jit/decomposelongs.h | 2 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 3 | ||||
-rw-r--r-- | src/jit/gentree.h | 1 | ||||
-rw-r--r-- | src/jit/gtlist.h | 18 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 21 | ||||
-rw-r--r-- | src/jit/lsra.cpp | 3 |
9 files changed, 178 insertions, 56 deletions
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index cce0205f46..dea5ef86fc 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1227,7 +1227,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Generate code to get the high N bits of a N*N=2N bit multiplication result void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { - assert(!(treeNode->gtFlags & GTF_UNSIGNED)); + if (treeNode->OperGet() == GT_MULHI) + { + assert(!(treeNode->gtFlags & GTF_UNSIGNED)); + } assert(!treeNode->gtOverflowEx()); regNumber targetReg = treeNode->gtRegNum; @@ -1247,7 +1250,6 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) GenTree* rmOp = op2; // Set rmOp to the contained memory operand (if any) - // if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg))) { regOp = op2; @@ -1261,10 +1263,19 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType); } - emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp); + instruction ins; + if ((treeNode->gtFlags & GTF_UNSIGNED) == 0) + { + ins = INS_imulEAX; + } + else + { + ins = INS_mulEAX; + } + emit->emitInsBinary(ins, size, treeNode, rmOp); // Move the result to the desired register, if necessary - if (targetReg != REG_RDX) + if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX) { inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); } @@ -1395,7 +1406,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB); #else // !defined(_TARGET_64BIT_) assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI || - oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI || + oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_LONG || oper == GT_DIV_HI || oper == GT_MOD_HI || oper == GT_ADD || oper == GT_SUB); #endif // !defined(_TARGET_64BIT_) @@ -2243,6 +2254,9 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_MULHI: +#ifdef _TARGET_X86_ + case GT_MUL_LONG: +#endif genCodeForMulHi(treeNode->AsOp()); genProduceReg(treeNode); break; @@ -9203,18 +9217,29 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) assert(varDsc->TypeGet() == TYP_LONG); assert(!varDsc->lvPromoted); GenTreePtr op1 = treeNode->gtOp.gtOp1; - noway_assert(op1->OperGet() == GT_LONG); + noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG); genConsumeRegs(op1); - // Definitions of register candidates will have been lowered to 2 int lclVars. - assert(!treeNode->InReg()); + if (op1->OperGet() == GT_LONG) + { + // Definitions of register candidates will have been lowered to 2 int lclVars. + assert(!treeNode->InReg()); + + GenTreePtr loVal = op1->gtGetOp1(); + GenTreePtr hiVal = op1->gtGetOp2(); + // NYI: Contained immediates. + NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate"); + emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0); + emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT)); + } + else if (op1->OperGet() == GT_MUL_LONG) + { + assert((op1->gtFlags & GTF_MUL_64RSLT) != 0); - GenTreePtr loVal = op1->gtGetOp1(); - GenTreePtr hiVal = op1->gtGetOp2(); - // NYI: Contained immediates. - NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate"); - emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0); - emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT)); + // Stack store + getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0); + getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum, genTypeSize(TYP_INT)); + } } #endif // !defined(_TARGET_64BIT_) diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index afbecdfc60..cfb7592b47 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -8520,6 +8520,9 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree) break; case GT_MUL: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif if (tree->gtFlags & GTF_MUL_64RSLT) { diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp index cf66487367..58eedc7e60 100644 --- a/src/jit/decomposelongs.cpp +++ b/src/jit/decomposelongs.cpp @@ -239,7 +239,7 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use) break; case GT_MUL: - NYI("Arithmetic binary operators on TYP_LONG - GT_MUL"); + nextNode = DecomposeMul(use); break; case GT_DIV: @@ -423,10 +423,12 @@ GenTree* DecomposeLongs::DecomposeStoreLclVar(LIR::Use& use) GenTree* tree = use.Def(); GenTree* rhs = tree->gtGetOp1(); - if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL)) + if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL) || + ((rhs->OperGet() == GT_MUL_LONG) && (rhs->gtFlags & GTF_MUL_64RSLT) != 0)) { // GT_CALLs are not decomposed, so will not be converted to GT_LONG // GT_STORE_LCL_VAR = GT_CALL are handled in genMultiRegCallStoreToLocal + // GT_MULs are not decomposed, so will not be converted to GT_LONG return tree->gtNext; } @@ -567,35 +569,7 @@ GenTree* DecomposeLongs::DecomposeCall(LIR::Use& use) assert(use.Def()->OperGet() == GT_CALL); // We only need to force var = call() if the call's result is used. - if (use.IsDummyUse()) - return use.Def()->gtNext; - - GenTree* user = use.User(); - if (user->OperGet() == GT_STORE_LCL_VAR) - { - // If parent is already a STORE_LCL_VAR, we can skip it if - // it is already marked as lvIsMultiRegRet. - unsigned varNum = user->AsLclVarCommon()->gtLclNum; - if (m_compiler->lvaTable[varNum].lvIsMultiRegRet) - { - return use.Def()->gtNext; - } - else if (!m_compiler->lvaTable[varNum].lvPromoted) - { - // If var wasn't promoted, we can just set lvIsMultiRegRet. - m_compiler->lvaTable[varNum].lvIsMultiRegRet = true; - return use.Def()->gtNext; - } - } - - GenTree* originalNode = use.Def(); - - // Otherwise, we need to force var = call() - unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight); - m_compiler->lvaTable[varNum].lvIsMultiRegRet = true; - - // Decompose the new LclVar use - return DecomposeLclVar(use); + return StoreNodeToVar(use); } //------------------------------------------------------------------------ @@ -946,6 +920,97 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use) } //------------------------------------------------------------------------ +// DecomposeMul: Decompose GT_MUL. The only GT_MULs that make it to decompose are +// those with the GTF_MUL_64RSLT flag set. These muls result in a mul instruction that +// returns its result in two registers like GT_CALLs do. Additionally, these muls are +// guaranteed to be in the form long = (long)int * (long)int. Therefore, to decompose +// these nodes, we convert them into GT_MUL_LONGs, undo the cast from int to long by +// stripping out the lo ops, and force them into the form var = mul, as we do for +// GT_CALLs. In codegen, we then produce a mul instruction that produces the result +// in edx:eax, and store those registers on the stack in genStoreLongLclVar. +// +// All other GT_MULs have been converted to helper calls in morph.cpp +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// +// Return Value: +// The next node to process. +// +GenTree* DecomposeLongs::DecomposeMul(LIR::Use& use) +{ + assert(use.IsInitialized()); + + GenTree* tree = use.Def(); + genTreeOps oper = tree->OperGet(); + + assert(oper == GT_MUL); + assert((tree->gtFlags & GTF_MUL_64RSLT) != 0); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + GenTree* loOp1 = op1->gtGetOp1(); + GenTree* hiOp1 = op1->gtGetOp2(); + GenTree* loOp2 = op2->gtGetOp1(); + GenTree* hiOp2 = op2->gtGetOp2(); + + Range().Remove(hiOp1); + Range().Remove(hiOp2); + Range().Remove(op1); + Range().Remove(op2); + + // Get rid of the hi ops. We don't need them. + tree->gtOp.gtOp1 = loOp1; + tree->gtOp.gtOp2 = loOp2; + tree->gtOper = GT_MUL_LONG; + + return StoreNodeToVar(use); +} + +//------------------------------------------------------------------------ +// StoreNodeToVar: Check if the user is a STORE_LCL_VAR, and if it isn't, +// store the node to a var. Then decompose the new LclVar. +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// +// Return Value: +// The next node to process. +// +GenTree* DecomposeLongs::StoreNodeToVar(LIR::Use& use) +{ + if (use.IsDummyUse()) + return use.Def()->gtNext; + + GenTree* tree = use.Def(); + GenTree* user = use.User(); + + if (user->OperGet() == GT_STORE_LCL_VAR) + { + // If parent is already a STORE_LCL_VAR, we can skip it if + // it is already marked as lvIsMultiRegRet. + unsigned varNum = user->AsLclVarCommon()->gtLclNum; + if (m_compiler->lvaTable[varNum].lvIsMultiRegRet) + { + return tree->gtNext; + } + else if (!m_compiler->lvaTable[varNum].lvPromoted) + { + // If var wasn't promoted, we can just set lvIsMultiRegRet. + m_compiler->lvaTable[varNum].lvIsMultiRegRet = true; + return tree->gtNext; + } + } + + // Otherwise, we need to force var = call() + unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight); + m_compiler->lvaTable[varNum].lvIsMultiRegRet = true; + + // Decompose the new LclVar use + return DecomposeLclVar(use); +} +//------------------------------------------------------------------------ // GetHiOper: Convert arithmetic operator to "high half" operator of decomposed node. // // Arguments: @@ -965,9 +1030,6 @@ genTreeOps DecomposeLongs::GetHiOper(genTreeOps oper) case GT_SUB: return GT_SUB_HI; break; - case GT_MUL: - return GT_MUL_HI; - break; case GT_DIV: return GT_DIV_HI; break; diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h index af9b342fb2..72a7ac44c9 100644 --- a/src/jit/decomposelongs.h +++ b/src/jit/decomposelongs.h @@ -51,10 +51,12 @@ private: GenTree* DecomposeNeg(LIR::Use& use); GenTree* DecomposeArith(LIR::Use& use); GenTree* DecomposeShift(LIR::Use& use); + GenTree* DecomposeMul(LIR::Use& use); // Helper functions GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult); + GenTree* StoreNodeToVar(LIR::Use& use); static genTreeOps GetHiOper(genTreeOps oper); static genTreeOps GetLoOper(genTreeOps oper); diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index bf6b3a179c..8ce57e0b71 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -9751,6 +9751,9 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z goto DASH; case GT_MUL: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif if (tree->gtFlags & GTF_MUL_64RSLT) { printf("L"); diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 619c964f71..18108b8000 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -1270,7 +1270,6 @@ public: { case GT_ADD_HI: case GT_SUB_HI: - case GT_MUL_HI: case GT_DIV_HI: case GT_MOD_HI: return true; diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index a03bcfe4b0..4c69022ecf 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -116,6 +116,9 @@ GTNODE(RSZ , ">>>" ,0,GTK_BINOP) GTNODE(ROL , "rol" ,0,GTK_BINOP) GTNODE(ROR , "ror" ,0,GTK_BINOP) GTNODE(MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply) + // GT_MULHI is used in division by a constant (fgMorphDivByConst). We turn + // the div into a MULHI + some adjustments. In codegen, we only use the + // results of the high register, and we drop the low results. GTNODE(ASG , "=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) GTNODE(ASG_ADD , "+=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) @@ -159,16 +162,23 @@ GTNODE(LEA , "lea" ,0,GTK_BINOP|GTK_EXOP) // nodes such as calls, returns and stores of long lclVars. GTNODE(LONG , "gt_long" ,0,GTK_BINOP) -// The following are nodes representing the upper half of a 64-bit operation -// that requires a carry/borrow. However, they are all named GT_XXX_HI for -// consistency. +// The following are nodes representing x86 specific long operators, including +// high operators of a 64-bit operations that requires a carry/borrow, which are +// named GT_XXX_HI for consistency, low operators of 64-bit operations that need +// to not be modified in phases post-decompose, and operators that return 64-bit +// results in one instruction. GTNODE(ADD_LO , "+Lo" ,1,GTK_BINOP) GTNODE(ADD_HI , "+Hi" ,1,GTK_BINOP) GTNODE(SUB_LO , "-Lo" ,0,GTK_BINOP) GTNODE(SUB_HI , "-Hi" ,0,GTK_BINOP) -GTNODE(MUL_HI , "*Hi" ,1,GTK_BINOP) GTNODE(DIV_HI , "/Hi" ,0,GTK_BINOP) GTNODE(MOD_HI , "%Hi" ,0,GTK_BINOP) +GTNODE(MUL_LONG , "*long" ,1,GTK_BINOP) // A mul that returns the 2N bit result of an NxN multiply. This op + // is used for x86 multiplies that take two ints and return a long + // result. All other multiplies with long results are morphed into + // helper calls. It is similar to GT_MULHI, the difference being that + // GT_MULHI drops the lo part of the result, whereas GT_MUL_LONG keeps + // both parts of the result. #endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) #ifdef FEATURE_SIMD diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 3bbc75baf5..abf418f984 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -435,6 +435,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_MUL: case GT_MULHI: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif SetMulOpCounts(tree); break; @@ -3739,8 +3742,11 @@ bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd) */ void Lowering::SetMulOpCounts(GenTreePtr tree) { +#if defined(_TARGET_X86_) + assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI || tree->OperGet() == GT_MUL_LONG); +#else assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI); - +#endif TreeNodeInfo* info = &(tree->gtLsraInfo); info->srcCount = 2; @@ -3787,7 +3793,12 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) // three-op form: reg = r/m * imm // This special widening 32x32->64 MUL is not used on x64 - assert((tree->gtFlags & GTF_MUL_64RSLT) == 0); +#if defined(_TARGET_X86_) + if(tree->OperGet() != GT_MUL_LONG) +#endif + { + assert((tree->gtFlags & GTF_MUL_64RSLT) == 0); + } // Multiply should never be using small types assert(!varTypeIsSmall(tree->TypeGet())); @@ -3805,7 +3816,11 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) info->setDstCandidates(m_lsra, RBM_RAX); hasImpliedFirstOperand = true; } - else if (tree->gtOper == GT_MULHI) + else if (tree->gtOper == GT_MULHI +#if defined(_TARGET_X86_) + || tree->OperGet() == GT_MUL_LONG +#endif + ) { // have to use the encoding:RDX:RAX = RAX * rm info->setDstCandidates(m_lsra, RBM_RAX); diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 317b976e42..1aac24fb35 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -2510,6 +2510,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) break; case GT_MULHI: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif killMask = RBM_RAX | RBM_RDX; break; |