diff options
author | Carol Eidt <carol.eidt@microsoft.com> | 2017-07-31 17:41:13 -0700 |
---|---|---|
committer | Carol Eidt <carol.eidt@microsoft.com> | 2017-08-16 11:37:16 -0700 |
commit | 0ad49b141f1d06c5e83ec58fa7f21c7d2f3714d3 (patch) | |
tree | 717327f4a7f0fabc9d1a7c83681504818d6d01f5 /src | |
parent | 89d8be12677a5d395d1ec7e62874a4476ea5d4d6 (diff) | |
download | coreclr-0ad49b141f1d06c5e83ec58fa7f21c7d2f3714d3.tar.gz coreclr-0ad49b141f1d06c5e83ec58fa7f21c7d2f3714d3.tar.bz2 coreclr-0ad49b141f1d06c5e83ec58fa7f21c7d2f3714d3.zip |
Move Containment Analysis to Lowering
- Ensure that the only changes to child nodes are the specification of srcCandidates
(i.e. the register requirements of the parent/consumer)
- Since this now occurs before redoing dataflow, we don't have complete information on
what lclVars are containable. To mitigate this:
- In `TreeNodeInfoInit()`, if a lclVar is marked `isRegOptional` but is now untracked,
change it to be contained.
- Mark decomposed/promoted fields of long params as `lvDoNotEnregister`
- Ensure that any new code added after `Lowering` is analyzed as necessary.
- When a value becomes unused, it is no longer contained. (This is needed now because liveness runs after containment analysis)
- Add dumping of nodes deleted as unused.
- Need to set byteable regs, as needed on contained RMW memory ops.
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/codegenarmarch.cpp | 3 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 3 | ||||
-rw-r--r-- | src/jit/compiler.h | 5 | ||||
-rw-r--r-- | src/jit/flowgraph.cpp | 11 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 123 | ||||
-rw-r--r-- | src/jit/gentree.h | 32 | ||||
-rw-r--r-- | src/jit/lclvars.cpp | 12 | ||||
-rw-r--r-- | src/jit/lir.h | 7 | ||||
-rw-r--r-- | src/jit/liveness.cpp | 9 | ||||
-rw-r--r-- | src/jit/lower.cpp | 587 | ||||
-rw-r--r-- | src/jit/lower.h | 108 | ||||
-rw-r--r-- | src/jit/lowerarmarch.cpp | 122 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 1031 | ||||
-rw-r--r-- | src/jit/lsra.cpp | 6 | ||||
-rw-r--r-- | src/jit/lsra.h | 23 | ||||
-rw-r--r-- | src/jit/lsraarm.cpp | 120 | ||||
-rw-r--r-- | src/jit/lsraarm64.cpp | 111 | ||||
-rw-r--r-- | src/jit/lsraarmarch.cpp | 283 | ||||
-rw-r--r-- | src/jit/lsraxarch.cpp | 916 | ||||
-rw-r--r-- | src/jit/nodeinfo.h | 4 | ||||
-rw-r--r-- | src/jit/rationalize.cpp | 1 |
21 files changed, 1974 insertions, 1543 deletions
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index 47dc419dba..61381d4c78 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -265,7 +265,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_LIST: case GT_FIELD_LIST: case GT_ARGPLACE: - // Nothing to do + // Should always be marked contained. + assert(!"LIST, FIELD_LIST and ARGPLACE nodes should always be marked contained."); break; case GT_PUTARG_STK: diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 116b0f30cf..5d1160ac96 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1851,7 +1851,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_LIST: case GT_FIELD_LIST: case GT_ARGPLACE: - // Nothing to do + // Should always be marked contained. + assert(!"LIST, FIELD_LIST and ARGPLACE nodes should always be marked contained."); break; case GT_SWAP: diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 65573188a0..2c6f9b203e 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -2028,7 +2028,7 @@ public: GenTree* gtNewBlkOpNode(GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock); - GenTree* gtNewPutArgReg(var_types type, GenTreePtr arg); + GenTree* gtNewPutArgReg(var_types type, GenTreePtr arg, regNumber argReg); protected: void gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOrFillVal, bool isVolatile); @@ -2444,6 +2444,9 @@ public: DNER_DepField, // It is a field of a dependently promoted struct DNER_NoRegVars, // opts.compFlags & CLFLG_REGVAR is not set DNER_MinOptsGC, // It is a GC Ref and we are compiling MinOpts +#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) + DNER_LongParamField, // It is a decomposed field of a long parameter. +#endif #ifdef JIT32_GCENCODER DNER_PinningRef, #endif diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 0cec40446a..91a478ca15 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -17,6 +17,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif #include "allocacheck.h" // for alloca +#ifndef LEGACY_BACKEND +#include "lower.h" // for LowerRange() +#endif /*****************************************************************************/ @@ -13477,6 +13480,10 @@ bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block) if (block->IsLIR()) { LIR::AsRange(block).InsertAtEnd(nop); +#ifndef LEGACY_BACKEND + LIR::ReadOnlyRange range(nop, nop); + m_pLowering->LowerRange(block, range); +#endif } else { @@ -13796,6 +13803,10 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) if (block->IsLIR()) { blockRange->InsertAfter(switchVal, zeroConstNode, condNode); +#ifndef LEGACY_BACKEND + LIR::ReadOnlyRange range(zeroConstNode, switchTree); + m_pLowering->LowerRange(block, range); +#endif // !LEGACY_BACKEND } else { diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 08ba61e012..2f023118a8 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -610,42 +610,6 @@ void Compiler::fgWalkAllTreesPre(fgWalkPreFn* visitor, void* pCallBackData) } } -// ------------------------------------------------------------------------------------------ -// gtClearReg: Sets the register to the "no register assignment" value, depending upon -// the type of the node, and whether it fits any of the special cases for register pairs -// or multi-reg call nodes. -// -// Arguments: -// compiler - compiler instance -// -// Return Value: -// None -void GenTree::gtClearReg(Compiler* compiler) -{ -#if CPU_LONG_USES_REGPAIR - if (isRegPairType(TypeGet()) || - // (IsLocal() && isRegPairType(compiler->lvaTable[gtLclVarCommon.gtLclNum].TypeGet())) || - (OperGet() == GT_MUL && (gtFlags & GTF_MUL_64RSLT))) - { - gtRegPair = REG_PAIR_NONE; - } - else -#endif // CPU_LONG_USES_REGPAIR - { - gtRegNum = REG_NA; - } - - // Also clear multi-reg state if this is a call node - if (IsCall()) - { - this->AsCall()->ClearOtherRegs(); - } - else if (IsCopyOrReload()) - { - this->AsCopyOrReload()->ClearOtherRegs(); - } -} - //----------------------------------------------------------- // CopyReg: Copy the _gtRegNum/_gtRegPair/gtRegTag fields. // @@ -704,56 +668,52 @@ bool GenTree::gtHasReg() const { assert(_gtRegNum != REG_NA); INDEBUG(assert(gtRegTag == GT_REGTAG_REGPAIR)); - hasReg = (gtRegPair != REG_PAIR_NONE); + return (gtRegPair != REG_PAIR_NONE); } - else + assert(_gtRegNum != REG_PAIR_NONE); + INDEBUG(assert(gtRegTag == GT_REGTAG_REG)); #endif + if (IsMultiRegCall()) { - assert(_gtRegNum != REG_PAIR_NONE); - INDEBUG(assert(gtRegTag == GT_REGTAG_REG)); + // Have to cast away const-ness because GetReturnTypeDesc() is a non-const method + GenTree* tree = const_cast<GenTree*>(this); + GenTreeCall* call = tree->AsCall(); + unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); + hasReg = false; - if (IsMultiRegCall()) + // A Multi-reg call node is said to have regs, if it has + // reg assigned to each of its result registers. + for (unsigned i = 0; i < regCount; ++i) { - // Has to cast away const-ness because GetReturnTypeDesc() is a non-const method - GenTree* tree = const_cast<GenTree*>(this); - GenTreeCall* call = tree->AsCall(); - unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); - hasReg = false; - - // A Multi-reg call node is said to have regs, if it has - // reg assigned to each of its result registers. - for (unsigned i = 0; i < regCount; ++i) + hasReg = (call->GetRegNumByIdx(i) != REG_NA); + if (!hasReg) { - hasReg = (call->GetRegNumByIdx(i) != REG_NA); - if (!hasReg) - { - break; - } + break; } } - else if (IsCopyOrReloadOfMultiRegCall()) - { - GenTree* tree = const_cast<GenTree*>(this); - GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload(); - GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall(); - unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); - hasReg = false; + } + else if (IsCopyOrReloadOfMultiRegCall()) + { + GenTree* tree = const_cast<GenTree*>(this); + GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload(); + GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall(); + unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); + hasReg = false; - // A Multi-reg copy or reload node is said to have regs, - // if it has valid regs in any of the positions. - for (unsigned i = 0; i < regCount; ++i) + // A Multi-reg copy or reload node is said to have regs, + // if it has valid regs in any of the positions. + for (unsigned i = 0; i < regCount; ++i) + { + hasReg = (copyOrReload->GetRegNumByIdx(i) != REG_NA); + if (hasReg) { - hasReg = (copyOrReload->GetRegNumByIdx(i) != REG_NA); - if (hasReg) - { - break; - } + break; } } - else - { - hasReg = (gtRegNum != REG_NA); - } + } + else + { + hasReg = (gtRegNum != REG_NA); } return hasReg; @@ -7006,8 +6966,9 @@ GenTree* Compiler::gtNewBlkOpNode( // gtNewPutArgReg: Creates a new PutArgReg node. // // Arguments: -// type - The actual type of the argument -// arg - The argument node +// type - The actual type of the argument +// arg - The argument node +// argReg - The register that the argument will be passed in // // Return Value: // Returns the newly created PutArgReg node. @@ -7015,7 +6976,7 @@ GenTree* Compiler::gtNewBlkOpNode( // Notes: // The node is generated as GenTreeMultiRegOp on armel, as GenTreeOp on all the other archs // -GenTreePtr Compiler::gtNewPutArgReg(var_types type, GenTreePtr arg) +GenTreePtr Compiler::gtNewPutArgReg(var_types type, GenTreePtr arg, regNumber argReg) { assert(arg != nullptr); @@ -7031,6 +6992,7 @@ GenTreePtr Compiler::gtNewPutArgReg(var_types type, GenTreePtr arg) { node = gtNewOperNode(GT_PUTARG_REG, type, arg); } + node->gtRegNum = argReg; return node; } @@ -15233,10 +15195,13 @@ bool GenTree::isContained() const } // these actually produce a register (the flags reg, we just don't model it) - // and are a separate instruction from the branch that consumes the result + // and are a separate instruction from the branch that consumes the result. + // They can only produce a result if the child is a SIMD equality comparison. else if (OperKind() & GTK_RELOP) { - assert(!isMarkedContained); + // We have to cast away const-ness since AsOp() method is non-const. + GenTree* childNode = const_cast<GenTree*>(this)->AsOp()->gtOp1; + assert((isMarkedContained == false) || childNode->IsSIMDEqualityOrInequality()); } // these either produce a result in register or set flags reg. diff --git a/src/jit/gentree.h b/src/jit/gentree.h index afb835e775..6a87dfbee6 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -727,9 +727,6 @@ public: // Copy the _gtRegNum/_gtRegPair/gtRegTag fields void CopyReg(GenTreePtr from); - - void gtClearReg(Compiler* compiler); - bool gtHasReg() const; regMaskTP gtGetRegMask() const; @@ -1437,6 +1434,20 @@ public: || OperIsShiftOrRotate(op); } +#ifdef _TARGET_XARCH_ + static bool OperIsRMWMemOp(genTreeOps gtOper) + { + // Return if binary op is one of the supported operations for RMW of memory. + return (gtOper == GT_ADD || gtOper == GT_SUB || gtOper == GT_AND || gtOper == GT_OR || gtOper == GT_XOR || + gtOper == GT_NOT || gtOper == GT_NEG || OperIsShiftOrRotate(gtOper)); + } + bool OperIsRMWMemOp() const + { + // Return if binary op is one of the supported operations for RMW of memory. + return OperIsRMWMemOp(gtOper); + } +#endif // _TARGET_XARCH_ + #if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) static bool OperIsHigh(genTreeOps gtOper) { @@ -1998,13 +2009,15 @@ public: void SetContained() { + assert(IsValue()); gtFlags |= GTF_CONTAINED; } void ClearContained() { + assert(IsValue()); gtFlags &= ~GTF_CONTAINED; - gtLsraInfo.regOptional = false; + ClearRegOptional(); } #endif // !LEGACY_BACKEND @@ -2114,6 +2127,12 @@ public: // that codegen can still generate code even if it wasn't allocated a // register. bool IsRegOptional() const; +#ifndef LEGACY_BACKEND + void ClearRegOptional() + { + gtLsraInfo.regOptional = false; + } +#endif // Returns "true" iff "this" is a phi-related node (i.e. a GT_PHI_ARG, GT_PHI, or a PhiDefn). bool IsPhiNode(); @@ -3112,6 +3131,11 @@ struct GenTreeFieldList : public GenTreeArgList { prevList->gtOp2 = this; } +#ifndef LEGACY_BACKEND + // A GT_FIELD_LIST is always contained. Note that this should only matter for the head node, but + // the list may be reordered. + gtFlags |= GTF_CONTAINED; +#endif } }; diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 065753af1a..fe8aaac35c 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -2035,7 +2035,12 @@ void Compiler::lvaPromoteLongVars() fieldVarDsc->lvFldOffset = (unsigned char)(index * genTypeSize(TYP_INT)); fieldVarDsc->lvFldOrdinal = (unsigned char)index; fieldVarDsc->lvParentLcl = lclNum; - fieldVarDsc->lvIsParam = isParam; + // Currently we do not support enregistering incoming promoted aggregates with more than one field. + if (isParam) + { + fieldVarDsc->lvIsParam = true; + lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LongParamField)); + } } } @@ -2170,6 +2175,11 @@ void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregister assert(varDsc->lvPinned); break; #endif +#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) + case DNER_LongParamField: + JITDUMP("it is a decomposed field of a long parameter\n"); + break; +#endif default: unreached(); break; diff --git a/src/jit/lir.h b/src/jit/lir.h index 762c79c3c3..4a71947be7 100644 --- a/src/jit/lir.h +++ b/src/jit/lir.h @@ -112,12 +112,12 @@ public: GenTree* m_firstNode; GenTree* m_lastNode; - ReadOnlyRange(GenTree* firstNode, GenTree* lastNode); - ReadOnlyRange(const ReadOnlyRange& other) = delete; ReadOnlyRange& operator=(const ReadOnlyRange& other) = delete; public: + ReadOnlyRange(GenTree* firstNode, GenTree* lastNode); + class Iterator { friend class ReadOnlyRange; @@ -312,6 +312,9 @@ public: inline void GenTree::SetUnusedValue() { gtLIRFlags |= LIR::Flags::UnusedValue; +#ifndef LEGACY_BACKEND + ClearContained(); +#endif } inline void GenTree::ClearUnusedValue() diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp index 4b8d602aac..089666c83b 100644 --- a/src/jit/liveness.cpp +++ b/src/jit/liveness.cpp @@ -1031,7 +1031,7 @@ void Compiler::fgExtendDbgLifetimes() #if !defined(_TARGET_64BIT_) DecomposeLongs::DecomposeRange(this, blockWeight, initRange); #endif // !defined(_TARGET_64BIT_) - m_pLowering->LowerRange(std::move(initRange)); + m_pLowering->LowerRange(block, initRange); #endif // !LEGACY_BACKEND // Naively inserting the initializer at the end of the block may add code after the block's @@ -2348,6 +2348,8 @@ bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, Ge store = addrUse.User(); value = store->gtGetOp2(); } + JITDUMP("Liveness is removing a dead store:\n"); + DISPNODE(store); bool isClosed = false; unsigned sideEffects = 0; @@ -2357,6 +2359,8 @@ bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, Ge { // If the range of the operands contains unrelated code or if it contains any side effects, // do not remove it. Instead, just remove the store. + JITDUMP(" Marking operands as unused:\n"); + DISPRANGE(operandsRange); store->VisitOperands([](GenTree* operand) -> GenTree::VisitResult { operand->SetUnusedValue(); @@ -2372,6 +2376,8 @@ bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, Ge // Compute the next node to process. Note that we must be careful not to set the next node to // process to a node that we are about to remove. + JITDUMP(" Deleting operands:\n"); + DISPRANGE(operandsRange); if (node->OperIsLocalStore()) { assert(node == store); @@ -2385,6 +2391,7 @@ bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, Ge blockRange.Delete(this, compCurBB, std::move(operandsRange)); } + JITDUMP("\n"); // If the store is marked as a late argument, it is referenced by a call. Instead of removing it, // bash it to a NOP. diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 1d23349d29..f40c4aa5a9 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -44,7 +44,6 @@ void Lowering::MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode) assert(!parentNode->OperIsLeaf()); assert(childNode->canBeContained()); childNode->SetContained(); - m_lsra->clearOperandCounts(childNode); } //------------------------------------------------------------------------ @@ -103,7 +102,6 @@ bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode) // // Arguments: // node - the node of interest. -// useTracked - true if this is being called after liveness so lvTracked is correct // // Return value: // True if this will definitely be a memory reference that could be contained. @@ -113,11 +111,11 @@ bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode) // the case of doNotEnregister local. This won't include locals that // for some other reason do not become register candidates, nor those that get // spilled. -// Also, if we call this before we redo liveness analysis, any new lclVars +// Also, because we usually call this before we redo dataflow, any new lclVars // introduced after the last dataflow analysis will not yet be marked lvTracked, // so we don't use that. // -bool Lowering::IsContainableMemoryOp(GenTree* node, bool useTracked) +bool Lowering::IsContainableMemoryOp(GenTree* node) { #ifdef _TARGET_XARCH_ if (node->isMemoryOp()) @@ -131,7 +129,7 @@ bool Lowering::IsContainableMemoryOp(GenTree* node, bool useTracked) return true; } LclVarDsc* varDsc = &comp->lvaTable[node->AsLclVar()->gtLclNum]; - return (varDsc->lvDoNotEnregister || (useTracked && !varDsc->lvTracked)); + return varDsc->lvDoNotEnregister; } #endif // _TARGET_XARCH_ return false; @@ -147,18 +145,64 @@ GenTree* Lowering::LowerNode(GenTree* node) { case GT_IND: TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true); + ContainCheckIndir(node->AsIndir()); break; case GT_STOREIND: - LowerStoreInd(node); + TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true); + if (!comp->codeGen->gcInfo.gcIsWriteBarrierAsgNode(node)) + { + LowerStoreIndir(node->AsIndir()); + } break; case GT_ADD: - return LowerAdd(node); + { + GenTree* afterTransform = LowerAdd(node); + if (afterTransform != nullptr) + { + return afterTransform; + } + __fallthrough; + } + +#if !defined(_TARGET_64BIT_) + case GT_ADD_LO: + case GT_ADD_HI: + case GT_SUB_LO: + case GT_SUB_HI: +#endif + case GT_SUB: + case GT_AND: + case GT_OR: + case GT_XOR: + ContainCheckBinary(node->AsOp()); + break; + +#ifdef _TARGET_XARCH_ + case GT_NEG: + // Codegen of this tree node sets ZF and SF flags. + if (!varTypeIsFloating(node)) + { + node->gtFlags |= GTF_ZSF_SET; + } + break; +#endif // _TARGET_XARCH_ + + case GT_MUL: + case GT_MULHI: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif + ContainCheckMul(node->AsOp()); + break; case GT_UDIV: case GT_UMOD: - return LowerUnsignedDivOrMod(node->AsOp()); + if (!LowerUnsignedDivOrMod(node->AsOp())) + { + ContainCheckDivOrMod(node->AsOp()); + } break; case GT_DIV: @@ -178,9 +222,16 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_GE: case GT_EQ: case GT_NE: + case GT_TEST_EQ: + case GT_TEST_NE: + case GT_CMP: LowerCompare(node); break; + case GT_JTRUE: + ContainCheckJTrue(node->AsOp()); + break; + case GT_JMP: LowerJmpMethod(node); break; @@ -189,68 +240,76 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerRet(node); break; + case GT_RETURNTRAP: + ContainCheckReturnTrap(node->AsOp()); + break; + case GT_CAST: LowerCast(node); break; +#ifdef _TARGET_XARCH_ + case GT_ARR_BOUNDS_CHECK: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD + ContainCheckBoundsChk(node->AsBoundsChk()); + break; +#endif // _TARGET_XARCH_ case GT_ARR_ELEM: return LowerArrElem(node); + case GT_ARR_OFFSET: + ContainCheckArrOffset(node->AsArrOffs()); + break; + case GT_ROL: case GT_ROR: LowerRotate(node); break; -#ifdef _TARGET_XARCH_ +#ifndef _TARGET_64BIT_ + case GT_LSH_HI: + case GT_RSH_LO: + ContainCheckShiftRotate(node->AsOp()); + break; +#endif // !_TARGET_64BIT_ + case GT_LSH: case GT_RSH: case GT_RSZ: +#ifdef _TARGET_XARCH_ LowerShift(node->AsOp()); - break; +#else + ContainCheckShiftRotate(node->AsOp()); #endif + break; case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: { - // TODO-Cleanup: Consider moving this code to LowerBlockStore, which is currently - // called from TreeNodeInfoInitBlockStore, and calling that method here. GenTreeBlk* blkNode = node->AsBlk(); TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false); + LowerBlockStore(blkNode); } break; -#ifdef FEATURE_SIMD - case GT_SIMD: - if (node->TypeGet() == TYP_SIMD12) - { - // GT_SIMD node requiring to produce TYP_SIMD12 in fact - // produces a TYP_SIMD16 result - node->gtType = TYP_SIMD16; - } + case GT_LCLHEAP: + ContainCheckLclHeap(node->AsOp()); + break; #ifdef _TARGET_XARCH_ - if ((node->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicGetItem) && (node->gtGetOp1()->OperGet() == GT_IND)) - { - // If SIMD vector is already in memory, we force its - // addr to be evaluated into a reg. This would allow - // us to generate [regBase] or [regBase+offset] or - // [regBase+sizeOf(SIMD vector baseType)*regIndex] - // to access the required SIMD vector element directly - // from memory. - // - // TODO-CQ-XARCH: If addr of GT_IND is GT_LEA, we - // might be able update GT_LEA to fold the regIndex - // or offset in some cases. Instead with this - // approach we always evaluate GT_LEA into a reg. - // Ideally, we should be able to lower GetItem intrinsic - // into GT_IND(newAddr) where newAddr combines - // the addr of SIMD vector with the given index. - node->gtOp.gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; - } -#endif + case GT_INTRINSIC: + ContainCheckIntrinsic(node->AsOp()); break; -#endif // FEATURE_SIMD +#endif // _TARGET_XARCH_ + +#ifdef FEATURE_SIMD + case GT_SIMD: + LowerSIMD(node->AsSIMD()); + break; +#endif // case GT_LCL_VAR: WidenSIMD12IfNecessary(node->AsLclVarCommon()); @@ -266,7 +325,6 @@ GenTree* Lowering::LowerNode(GenTree* node) new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, store->TypeGet(), store->gtOp1, nullptr); store->gtOp1 = bitcast; BlockRange().InsertBefore(store, bitcast); - break; } } #endif // _TARGET_AMD64_ @@ -289,6 +347,10 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerStoreLoc(node->AsLclVarCommon()); break; + case GT_LOCKADD: + CheckImmedAndMakeContained(node, node->gtOp.gtOp2); + break; + default: break; } @@ -445,7 +507,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node) unsigned blockWeight = originalSwitchBB->getBBWeight(comp); LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node); - use.ReplaceWithLclVar(comp, blockWeight); + ReplaceWithLclVar(use); // GT_SWITCH(indexExpression) is now two statements: // 1. a statement containing 'asg' (for temp = indexExpression) @@ -907,7 +969,7 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP // // clang-format on - putArg = comp->gtNewPutArgReg(type, arg); + putArg = comp->gtNewPutArgReg(type, arg, info->regNum); } else if (info->structDesc.eightByteCount == 2) { @@ -953,10 +1015,13 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP GenTreePtr newOper = comp->gtNewPutArgReg( comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr], info->structDesc.eightByteSizes[ctr]), - fieldListPtr->gtOp.gtOp1); + fieldListPtr->gtOp.gtOp1, (ctr == 0) ? info->regNum : info->otherRegNum); // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper); + + // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal. + fieldListPtr->gtRegNum = REG_NA; } // Just return arg. The GT_FIELD_LIST is not replaced. @@ -979,16 +1044,31 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP GenTreeFieldList* fieldListPtr = arg->AsFieldList(); assert(fieldListPtr->IsFieldListHead()); + // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) + regNumber argReg = info->regNum; for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++) { GenTreePtr curOp = fieldListPtr->gtOp.gtOp1; var_types curTyp = curOp->TypeGet(); // Create a new GT_PUTARG_REG node with op1 - GenTreePtr newOper = comp->gtNewPutArgReg(curTyp, curOp); + GenTreePtr newOper = comp->gtNewPutArgReg(curTyp, curOp, argReg); // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper); + + // Update argReg for the next putarg_reg (if any) + argReg = genRegArgNext(argReg); + +#if defined(_TARGET_ARM_) + // A double register is modelled as an even-numbered single one + if (fieldListPtr->Current()->TypeGet() == TYP_DOUBLE) + { + argReg = genRegArgNext(argReg); + } +#endif // _TARGET_ARM_ + // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal. + fieldListPtr->gtRegNum = REG_NA; } // Just return arg. The GT_FIELD_LIST is not replaced. @@ -999,7 +1079,7 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP #endif // FEATURE_MULTIREG_ARGS #endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { - putArg = comp->gtNewPutArgReg(type, arg); + putArg = comp->gtNewPutArgReg(type, arg, info->regNum); } } else @@ -1195,7 +1275,8 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr); (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList); - putArg = NewPutArg(call, fieldList, info, TYP_VOID); + putArg = NewPutArg(call, fieldList, info, TYP_VOID); + putArg->gtRegNum = info->regNum; BlockRange().InsertBefore(arg, putArg); BlockRange().Remove(arg); @@ -1214,7 +1295,8 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr); // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence. (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList); - putArg = NewPutArg(call, fieldList, info, TYP_VOID); + putArg = NewPutArg(call, fieldList, info, TYP_VOID); + putArg->gtRegNum = info->regNum; // We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg. BlockRange().InsertBefore(arg, fieldList, putArg); @@ -1318,6 +1400,7 @@ void Lowering::LowerCall(GenTree* node) DISPTREERANGE(BlockRange(), call); JITDUMP("\n"); + call->ClearOtherRegs(); LowerArgsForCall(call); // note that everything generated from this point on runs AFTER the outgoing args are placed @@ -1420,6 +1503,7 @@ void Lowering::LowerCall(GenTree* node) } } + ContainCheckRange(resultRange); BlockRange().InsertBefore(insertionPoint, std::move(resultRange)); call->gtControlExpr = result; @@ -1430,6 +1514,7 @@ void Lowering::LowerCall(GenTree* node) CheckVSQuirkStackPaddingNeeded(call); } + ContainCheckCallOperands(call); JITDUMP("lowering call (after):\n"); DISPTREERANGE(BlockRange(), call); JITDUMP("\n"); @@ -1817,6 +1902,7 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) GenTreeLclVar* local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET); GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local); + ContainCheckRange(local, assignExpr); BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr)); } } @@ -1959,6 +2045,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget assert(argEntry->node->gtOper == GT_PUTARG_REG); GenTree* secondArg = argEntry->node->gtOp.gtOp1; + ContainCheckRange(callTargetRange); BlockRange().InsertAfter(secondArg, std::move(callTargetRange)); bool isClosed; @@ -1987,6 +2074,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget assert(argEntry->node->gtOper == GT_PUTARG_STK); GenTree* arg0 = argEntry->node->gtOp.gtOp1; + ContainCheckRange(callTargetRange); BlockRange().InsertAfter(arg0, std::move(callTargetRange)); bool isClosed; @@ -2116,6 +2204,7 @@ void Lowering::LowerCompare(GenTree* cmp) { loCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, loSrc1, loSrc2); BlockRange().InsertBefore(cmp, loCmp); + ContainCheckBinary(loCmp->AsOp()); } if (hiSrc1->OperIs(GT_CNS_INT)) @@ -2132,10 +2221,12 @@ void Lowering::LowerCompare(GenTree* cmp) { hiCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, hiSrc1, hiSrc2); BlockRange().InsertBefore(cmp, hiCmp); + ContainCheckBinary(hiCmp->AsOp()); } hiCmp = comp->gtNewOperNode(GT_OR, TYP_INT, loCmp, hiCmp); BlockRange().InsertBefore(cmp, hiCmp); + ContainCheckBinary(hiCmp->AsOp()); } else { @@ -2220,12 +2311,15 @@ void Lowering::LowerCompare(GenTree* cmp) hiCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, hiSrc1, hiSrc2); BlockRange().InsertBefore(cmp, hiCmp); + ContainCheckCompare(hiCmp->AsOp()); } else { loCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, loSrc1, loSrc2); hiCmp = comp->gtNewOperNode(GT_SUB_HI, TYP_INT, hiSrc1, hiSrc2); BlockRange().InsertBefore(cmp, loCmp, hiCmp); + ContainCheckCompare(loCmp->AsOp()); + ContainCheckBinary(hiCmp->AsOp()); // // Try to move the first SUB_HI operands right in front of it, this allows using @@ -2311,6 +2405,7 @@ void Lowering::LowerCompare(GenTree* cmp) GenTree* cast = comp->gtNewCastNode(TYP_LONG, *smallerOpUse, TYP_LONG); *smallerOpUse = cast; BlockRange().InsertAfter(cast->gtGetOp1(), cast); + ContainCheckCast(cast->AsCast()); } } } @@ -2323,7 +2418,7 @@ void Lowering::LowerCompare(GenTree* cmp) GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); ssize_t op2Value = op2->IconValue(); - if (IsContainableMemoryOp(op1, false) && varTypeIsSmall(op1Type) && genTypeCanRepresentValue(op1Type, op2Value)) + if (IsContainableMemoryOp(op1) && varTypeIsSmall(op1Type) && genTypeCanRepresentValue(op1Type, op2Value)) { // // If op1's type is small then try to narrow op2 so it has the same type as op1. @@ -2353,12 +2448,25 @@ void Lowering::LowerCompare(GenTree* cmp) // the result of bool returning calls. // - if (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() || - IsContainableMemoryOp(castOp, false)) + if (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() || IsContainableMemoryOp(castOp)) { assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation - castOp->gtType = castToType; + castOp->gtType = castToType; + // If we have any contained memory ops on castOp, they must now not be contained. + if (castOp->OperIsLogical()) + { + GenTree* op1 = castOp->gtGetOp1(); + if ((op1 != nullptr) && !op1->IsCnsIntOrI()) + { + op1->ClearContained(); + } + GenTree* op2 = castOp->gtGetOp2(); + if ((op2 != nullptr) && !op2->IsCnsIntOrI()) + { + op2->ClearContained(); + } + } cmp->gtOp.gtOp1 = castOp; op2->gtType = castToType; @@ -2398,8 +2506,11 @@ void Lowering::LowerCompare(GenTree* cmp) cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE); cmp->gtOp.gtOp1 = andOp1; cmp->gtOp.gtOp2 = andOp2; + // We will re-evaluate containment below + andOp1->ClearContained(); + andOp2->ClearContained(); - if (IsContainableMemoryOp(andOp1, false) && andOp2->IsIntegralConst()) + if (IsContainableMemoryOp(andOp1) && andOp2->IsIntegralConst()) { // // For "test" we only care about the bits that are set in the second operand (mask). @@ -2450,6 +2561,7 @@ void Lowering::LowerCompare(GenTree* cmp) } } #endif // _TARGET_XARCH_ + ContainCheckCompare(cmp->AsOp()); } // Lower "jmp <method>" tail call to insert PInvoke method epilog if required. @@ -2493,6 +2605,7 @@ void Lowering::LowerRet(GenTree* ret) { InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret)); } + ContainCheckRet(ret->AsOp()); } GenTree* Lowering::LowerDirectCall(GenTreeCall* call) @@ -2648,6 +2761,7 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call) assert(thisArgNode->gtOper == GT_PUTARG_REG); GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1; + GenTree* thisExpr = originalThisExpr; // We're going to use the 'this' expression multiple times, so make a local to copy it. @@ -2670,21 +2784,21 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call) unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call")); LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode); - thisExprUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), delegateInvokeTmp); + ReplaceWithLclVar(thisExprUse, delegateInvokeTmp); - originalThisExpr = thisExprUse.Def(); // it's changed; reload it. - lclNum = delegateInvokeTmp; + thisExpr = thisExprUse.Def(); // it's changed; reload it. + lclNum = delegateInvokeTmp; } // replace original expression feeding into thisPtr with // [originalThis + offsetOfDelegateInstance] GenTree* newThisAddr = new (comp, GT_LEA) - GenTreeAddrMode(TYP_REF, originalThisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance); + GenTreeAddrMode(TYP_REF, thisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance); GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr); - BlockRange().InsertAfter(originalThisExpr, newThisAddr, newThis); + BlockRange().InsertAfter(thisExpr, newThisAddr, newThis); thisArgNode->gtOp.gtOp1 = newThis; @@ -2779,11 +2893,9 @@ GenTree* Lowering::SetGCState(int state) GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1); - GenTree* storeGcState = new (comp, GT_STOREIND) - GenTreeStoreInd(TYP_BYTE, - new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState), - new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state)); - + GenTree* stateNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state); + GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState); + GenTree* storeGcState = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_BYTE, addr, stateNode); return storeGcState; } @@ -2966,6 +3078,7 @@ void Lowering::InsertPInvokeMethodProlog() // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); + ContainCheckStoreIndir(frameUpd->AsIndir()); DISPTREERANGE(firstBlockRange, frameUpd); } #endif // _TARGET_64BIT_ @@ -3030,6 +3143,7 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePt // That is [tcb + offsetOfGcState] = 1 GenTree* storeGCState = SetGCState(1); returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState)); + ContainCheckStoreIndir(storeGCState->AsIndir()); // Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call. @@ -3041,6 +3155,7 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePt { GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame); returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); + ContainCheckStoreIndir(frameUpd->AsIndir()); } } @@ -3148,7 +3263,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) callFrameInfo.offsetOfCallTarget); store->gtOp1 = src; - BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, store)); + InsertTreeBeforeAndContainCheck(insertBefore, store); } #ifdef _TARGET_X86_ @@ -3161,7 +3276,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE); - BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeCallSiteSP)); + InsertTreeBeforeAndContainCheck(insertBefore, storeCallSiteSP); #endif @@ -3178,7 +3293,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) labelRef->gtType = TYP_I_IMPL; storeLab->gtOp1 = labelRef; - BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeLab)); + InsertTreeBeforeAndContainCheck(insertBefore, storeLab); // Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method // contains PInvokes; on 64-bit targets this is necessary in non-stubs. @@ -3194,6 +3309,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) // Stubs do this once per stub, not once per call. GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd)); + ContainCheckStoreIndir(frameUpd->AsIndir()); } #endif // _TARGET_64BIT_ @@ -3204,6 +3320,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) GenTree* storeGCState = SetGCState(0); BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState)); + ContainCheckStoreIndir(storeGCState->AsIndir()); } //------------------------------------------------------------------------ @@ -3229,11 +3346,12 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) frameAddr->SetOperRaw(GT_LCL_VAR_ADDR); // Insert call to CORINFO_HELP_JIT_PINVOKE_END - GenTree* helperCall = + GenTreeCall* helperCall = comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, 0, comp->gtNewArgList(frameAddr)); comp->fgMorphTree(helperCall); BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall)); + ContainCheckCallOperands(helperCall); return; } @@ -3242,9 +3360,11 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) GenTree* tree = SetGCState(1); BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); + ContainCheckStoreIndir(tree->AsIndir()); tree = CreateReturnTrapSeq(); BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); + ContainCheckReturnTrap(tree->AsOp()); // Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets thi // happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive. @@ -3255,6 +3375,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) { tree = CreateFrameLinkUpdate(PopFrame); BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); + ContainCheckStoreIndir(tree->AsIndir()); } #else const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo; @@ -3271,6 +3392,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) storeCallSiteTracker->gtOp1 = constantZero; BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker); + ContainCheckStoreLoc(storeCallSiteTracker); #endif // _TARGET_64BIT_ } @@ -3438,7 +3560,7 @@ GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call) } LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node); - thisPtrUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), vtableCallTemp); + ReplaceWithLclVar(thisPtrUse, vtableCallTemp); lclNum = vtableCallTemp; } @@ -3582,6 +3704,7 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) ind->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; BlockRange().InsertAfter(call->gtCallAddr, ind); + ContainCheckIndir(ind->AsIndir()); call->gtCallAddr = ind; } else @@ -3845,6 +3968,15 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir) GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset); + // Neither the base nor the index should now be contained. + if (base != nullptr) + { + base->ClearContained(); + } + if (index != nullptr) + { + index->ClearContained(); + } addrMode->gtRsvdRegs = addr->gtRsvdRegs; addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS); addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free. @@ -3871,44 +4003,34 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir) // node - the node we care about // // Returns: -// The next node to lower. +// The next node to lower if we have transformed the ADD; nullptr otherwise. // GenTree* Lowering::LowerAdd(GenTree* node) { GenTree* next = node->gtNext; -#ifdef _TARGET_ARMARCH_ - // For ARM architectures we don't have the LEA instruction - // therefore we won't get much benefit from doing this. - return next; -#else // _TARGET_ARMARCH_ - if (!varTypeIsIntegralOrI(node)) +#ifndef _TARGET_ARMARCH_ + if (varTypeIsIntegralOrI(node)) { - return next; - } - - LIR::Use use; - if (!BlockRange().TryGetUse(node, &use)) - { - return next; - } - - // if this is a child of an indir, let the parent handle it. - GenTree* parent = use.User(); - if (parent->OperIsIndir()) - { - return next; - } - - // if there is a chain of adds, only look at the topmost one - if (parent->gtOper == GT_ADD) - { - return next; + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + // If this is a child of an indir, let the parent handle it. + // If there is a chain of adds, only look at the topmost one. + GenTree* parent = use.User(); + if (!parent->OperIsIndir() && (parent->gtOper != GT_ADD)) + { + GenTree* addr = TryCreateAddrMode(std::move(use), false); + if (addr != node) + { + return addr->gtNext; + } + } + } } - - GenTree* addr = TryCreateAddrMode(std::move(use), false); - return addr->gtNext; #endif // !_TARGET_ARMARCH_ + + return nullptr; } //------------------------------------------------------------------------ @@ -3917,12 +4039,16 @@ GenTree* Lowering::LowerAdd(GenTree* node) // Arguments: // divMod - pointer to the GT_UDIV/GT_UMOD node to be lowered // +// Return Value: +// Returns a boolean indicating whether the node was transformed. +// // Notes: // - Transform UDIV/UMOD by power of 2 into RSZ/AND // - Transform UDIV by constant >= 2^(N-1) into GE // - Transform UDIV/UMOD by constant >= 3 into "magic division" +// -GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) +bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) { assert(divMod->OperIs(GT_UDIV, GT_UMOD)); @@ -3933,13 +4059,13 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) #if !defined(_TARGET_64BIT_) if (dividend->OperIs(GT_LONG)) { - return next; + return false; } #endif if (!divisor->IsCnsIntOrI()) { - return next; + return false; } if (dividend->IsCnsIntOrI()) @@ -3947,7 +4073,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) // We shouldn't see a divmod with constant operands here but if we do then it's likely // because optimizations are disabled or it's a case that's supposed to throw an exception. // Don't optimize this. - return next; + return false; } const var_types type = divMod->TypeGet(); @@ -3964,7 +4090,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) if (divisorValue == 0) { - return next; + return false; } const bool isDiv = divMod->OperIs(GT_UDIV); @@ -3985,11 +4111,10 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } divMod->SetOper(newOper); - divisor->AsIntCon()->SetIconValue(divisorValue); - - return next; + divisor->gtIntCon.SetIconValue(divisorValue); + ContainCheckNode(divMod); + return true; } - if (isDiv) { // If the divisor is greater or equal than 2^(N - 1) then the result is 1 @@ -3999,7 +4124,8 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) { divMod->SetOper(GT_GE); divMod->gtFlags |= GTF_UNSIGNED; - return next; + ContainCheckNode(divMod); + return true; } } @@ -4038,7 +4164,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) if (requiresDividendMultiuse) { LIR::Use dividendUse(BlockRange(), &divMod->gtOp1, divMod); - dividendLclNum = dividendUse.ReplaceWithLclVar(comp, curBBWeight); + dividendLclNum = ReplaceWithLclVar(dividendUse); dividend = divMod->gtGetOp1(); } @@ -4050,6 +4176,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) mulhi->gtFlags |= GTF_UNSIGNED; divisor->AsIntCon()->SetIconValue(magic); BlockRange().InsertBefore(divMod, mulhi); + GenTree* firstNode = mulhi; if (requiresAdjustment) { @@ -4063,7 +4190,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) BlockRange().InsertBefore(divMod, one, rsz); LIR::Use mulhiUse(BlockRange(), &sub->gtOp.gtOp2, sub); - unsigned mulhiLclNum = mulhiUse.ReplaceWithLclVar(comp, curBBWeight); + unsigned mulhiLclNum = ReplaceWithLclVar(mulhiUse); GenTree* mulhiCopy = comp->gtNewLclvNode(mulhiLclNum, type); GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhiCopy); @@ -4099,31 +4226,30 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) BlockRange().InsertBefore(divMod, div, divisor, mul, dividend); comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp); } + ContainCheckRange(firstNode, divMod); - return mulhi; + return true; } #endif - - return next; + return false; } -//------------------------------------------------------------------------ -// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2 -// const divisor into equivalent but faster sequences. +// LowerConstIntDivOrMod: Transform integer GT_DIV/GT_MOD nodes with a power of 2 +// const divisor into equivalent but faster sequences. // // Arguments: -// node - pointer to node we care about +// node - pointer to the DIV or MOD node // // Returns: // The next node to lower. // -GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) +GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) { assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); - - GenTree* next = node->gtNext; - GenTree* divMod = node; - GenTree* divisor = divMod->gtGetOp2(); + GenTree* next = node->gtNext; + GenTree* divMod = node; + GenTree* dividend = divMod->gtGetOp1(); + GenTree* divisor = divMod->gtGetOp2(); if (!divisor->IsCnsIntOrI()) { @@ -4133,8 +4259,6 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) const var_types type = divMod->TypeGet(); assert((type == TYP_INT) || (type == TYP_LONG)); - GenTree* dividend = divMod->gtGetOp1(); - if (dividend->IsCnsIntOrI()) { // We shouldn't see a divmod with constant operands here but if we do then it's likely @@ -4168,6 +4292,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) // If the divisor is the minimum representable integer value then we can use a compare, // the result is 1 iff the dividend equals divisor. divMod->SetOper(GT_EQ); + ContainCheckCompare(divMod->AsOp()); return next; } } @@ -4229,7 +4354,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) if (requiresDividendMultiuse) { LIR::Use dividendUse(BlockRange(), &mulhi->gtOp.gtOp2, mulhi); - dividendLclNum = dividendUse.ReplaceWithLclVar(comp, curBBWeight); + dividendLclNum = ReplaceWithLclVar(dividendUse); } GenTree* adjusted; @@ -4252,7 +4377,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) BlockRange().InsertBefore(divMod, shiftBy, signBit); LIR::Use adjustedUse(BlockRange(), &signBit->gtOp.gtOp1, signBit); - unsigned adjustedLclNum = adjustedUse.ReplaceWithLclVar(comp, curBBWeight); + unsigned adjustedLclNum = ReplaceWithLclVar(adjustedUse); adjusted = comp->gtNewLclvNode(adjustedLclNum, type); comp->lvaTable[adjustedLclNum].incRefCnts(curBBWeight, comp); BlockRange().InsertBefore(divMod, adjusted); @@ -4307,7 +4432,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) unsigned curBBWeight = comp->compCurBB->getBBWeight(comp); LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod); - opDividend.ReplaceWithLclVar(comp, curBBWeight); + ReplaceWithLclVar(opDividend); dividend = divMod->gtGetOp1(); assert(dividend->OperGet() == GT_LCL_VAR); @@ -4340,11 +4465,13 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue)); newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor); + ContainCheckShiftRotate(newDivMod->AsOp()); if (divisorValue < 0) { // negate the result if the divisor is negative newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod); + ContainCheckNode(newDivMod); } } else @@ -4356,6 +4483,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) newDivMod = comp->gtNewOperNode(GT_SUB, type, comp->gtNewLclvNode(dividendLclNum, type), comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor)); + ContainCheckBinary(newDivMod->AsOp()); comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp); } @@ -4366,7 +4494,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) BlockRange().Remove(dividend); // linearize and insert the new tree before the original divMod node - BlockRange().InsertBefore(divMod, LIR::SeqTree(comp, newDivMod)); + InsertTreeBeforeAndContainCheck(divMod, newDivMod); BlockRange().Remove(divMod); // replace the original divmod node with the new divmod tree @@ -4374,24 +4502,37 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) return newDivMod->gtNext; } - //------------------------------------------------------------------------ -// LowerStoreInd: attempt to transform an indirect store to use an -// addressing mode +// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2 +// const divisor into equivalent but faster sequences. // // Arguments: -// node - the node we care about +// node - the DIV or MOD node +// +// Returns: +// The next node to lower. // -void Lowering::LowerStoreInd(GenTree* node) +GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) { - assert(node != nullptr); - assert(node->OperGet() == GT_STOREIND); + assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); + GenTree* next = node->gtNext; + GenTree* divMod = node; + GenTree* dividend = divMod->gtGetOp1(); + GenTree* divisor = divMod->gtGetOp2(); + +#ifdef _TARGET_XARCH_ + if (!varTypeIsFloating(node->TypeGet())) +#endif // _TARGET_XARCH_ + { + next = LowerConstIntDivOrMod(node); + } - TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true); + if ((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)) + { + ContainCheckDivOrMod(node->AsOp()); + } - // Mark all GT_STOREIND nodes to indicate that it is not known - // whether it represents a RMW memory op. - node->AsStoreInd()->SetRMWStatusDefault(); + return next; } void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node) @@ -4503,7 +4644,7 @@ GenTree* Lowering::LowerArrElem(GenTree* node) if (!arrElem->gtArrObj->IsLocal()) { LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem); - arrObjUse.ReplaceWithLclVar(comp, blockWeight); + ReplaceWithLclVar(arrObjUse); } GenTree* arrObjNode = arrElem->gtArrObj; @@ -4514,6 +4655,7 @@ GenTree* Lowering::LowerArrElem(GenTree* node) // The first ArrOffs node will have 0 for the offset of the previous dimension. GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0); BlockRange().InsertBefore(insertionPoint, prevArrOffs); + GenTree* nextToLower = prevArrOffs; for (unsigned char dim = 0; dim < rank; dim++) { @@ -4589,7 +4731,7 @@ GenTree* Lowering::LowerArrElem(GenTree* node) DISPTREERANGE(BlockRange(), leaNode); JITDUMP("\n\n"); - return leaNode; + return nextToLower; } void Lowering::DoPhase() @@ -4711,10 +4853,13 @@ void Lowering::DoPhase() #ifdef DEBUG node->gtSeqNum = currentLoc; + // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register + // value. + // Although this looks like a no-op it sets the tag. + node->gtRegNum = node->gtRegNum; #endif node->gtLsraInfo.Initialize(m_lsra, node, currentLoc); - node->gtClearReg(comp); currentLoc += 2; @@ -4771,6 +4916,7 @@ void Lowering::CheckCallArg(GenTree* arg) case GT_FIELD_LIST: { GenTreeFieldList* list = arg->AsFieldList(); + assert(list->isContained()); assert(list->IsFieldListHead()); for (; list != nullptr; list = list->Rest()) @@ -5133,6 +5279,122 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo) } //------------------------------------------------------------------------ +// Containment Analysis +//------------------------------------------------------------------------ +void Lowering::ContainCheckNode(GenTree* node) +{ + switch (node->gtOper) + { + case GT_STORE_LCL_VAR: + case GT_STORE_LCL_FLD: + ContainCheckStoreLoc(node->AsLclVarCommon()); + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_TEST_EQ: + case GT_TEST_NE: + case GT_CMP: + ContainCheckCompare(node->AsOp()); + break; + + case GT_JTRUE: + ContainCheckJTrue(node->AsOp()); + break; + + case GT_ADD: + case GT_SUB: +#if !defined(_TARGET_64BIT_) + case GT_ADD_LO: + case GT_ADD_HI: + case GT_SUB_LO: + case GT_SUB_HI: +#endif + case GT_AND: + case GT_OR: + case GT_XOR: + ContainCheckBinary(node->AsOp()); + break; + +#ifdef _TARGET_XARCH_ + case GT_NEG: + // Codegen of this tree node sets ZF and SF flags. + if (!varTypeIsFloating(node)) + { + node->gtFlags |= GTF_ZSF_SET; + } + break; +#endif // _TARGET_XARCH_ + +#if defined(_TARGET_X86_) + case GT_MUL_LONG: +#endif + case GT_MUL: + case GT_MULHI: + ContainCheckMul(node->AsOp()); + break; + case GT_DIV: + case GT_MOD: + case GT_UDIV: + case GT_UMOD: + ContainCheckDivOrMod(node->AsOp()); + break; + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROL: + case GT_ROR: +#ifndef _TARGET_64BIT_ + case GT_LSH_HI: + case GT_RSH_LO: +#endif + ContainCheckShiftRotate(node->AsOp()); + break; + case GT_ARR_OFFSET: + ContainCheckArrOffset(node->AsArrOffs()); + break; + case GT_LCLHEAP: + ContainCheckLclHeap(node->AsOp()); + break; + case GT_RETURN: + ContainCheckRet(node->AsOp()); + break; + case GT_RETURNTRAP: + ContainCheckReturnTrap(node->AsOp()); + break; + case GT_STOREIND: + ContainCheckStoreIndir(node->AsIndir()); + case GT_IND: + ContainCheckIndir(node->AsIndir()); + break; + case GT_PUTARG_REG: + case GT_PUTARG_STK: +#ifdef _TARGET_ARM_ + case GT_PUTARG_SPLIT: +#endif + // The regNum must have been set by the lowering of the call. + assert(node->gtRegNum != REG_NA); + break; +#ifdef _TARGET_XARCH_ + case GT_INTRINSIC: + ContainCheckIntrinsic(node->AsOp()); + break; +#endif // _TARGET_XARCH_ +#ifdef FEATURE_SIMD + case GT_SIMD: + ContainCheckSIMD(node->AsSIMD()); + break; +#endif // FEATURE_SIMD + default: + break; + } +} + +//------------------------------------------------------------------------ // GetIndirSourceCount: Get the source registers for an indirection that might be contained. // // Arguments: @@ -5189,7 +5451,7 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node) // everything is made explicit by adding casts. assert(dividend->TypeGet() == divisor->TypeGet()); - if (IsContainableMemoryOp(divisor, true) || divisor->IsCnsNonZeroFltOrDbl()) + if (IsContainableMemoryOp(divisor) || divisor->IsCnsNonZeroFltOrDbl()) { MakeSrcContained(node, divisor); } @@ -5211,7 +5473,7 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node) #endif // divisor can be an r/m, but the memory indirection must be of the same size as the divide - if (IsContainableMemoryOp(divisor, true) && (divisor->TypeGet() == node->TypeGet())) + if (IsContainableMemoryOp(divisor) && (divisor->TypeGet() == node->TypeGet())) { MakeSrcContained(node, divisor); } @@ -5232,12 +5494,14 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node) // void Lowering::ContainCheckReturnTrap(GenTreeOp* node) { +#ifdef _TARGET_XARCH_ assert(node->OperIs(GT_RETURNTRAP)); // This just turns into a compare of its child with an int + a conditional call if (node->gtOp1->isIndir()) { MakeSrcContained(node, node->gtOp1); } +#endif // _TARGET_XARCH_ } //------------------------------------------------------------------------ @@ -5311,7 +5575,6 @@ void Lowering::ContainCheckRet(GenTreeOp* ret) #endif // FEATURE_MULTIREG_RET } -#ifdef FEATURE_SIMD //------------------------------------------------------------------------ // ContainCheckJTrue: determine whether the source of a JTRUE should be contained. // @@ -5320,6 +5583,11 @@ void Lowering::ContainCheckRet(GenTreeOp* ret) // void Lowering::ContainCheckJTrue(GenTreeOp* node) { + // The compare does not need to be generated into a register. + GenTree* cmp = node->gtGetOp1(); + cmp->gtLsraInfo.isNoRegCompare = true; + +#ifdef FEATURE_SIMD assert(node->OperIs(GT_JTRUE)); // Say we have the following IR @@ -5329,7 +5597,6 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node) // // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality // intrinsic will set or clear the Zero flag. - GenTree* cmp = node->gtGetOp1(); genTreeOps cmpOper = cmp->OperGet(); if (cmpOper == GT_EQ || cmpOper == GT_NE) { @@ -5340,12 +5607,36 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node) { // We always generate code for a SIMD equality comparison, though it produces no value. // Neither the GT_JTRUE nor the immediate need to be evaluated. - m_lsra->clearOperandCounts(cmp); MakeSrcContained(cmp, cmpOp2); + cmpOp1->gtLsraInfo.isNoRegCompare = true; + // We have to reverse compare oper in the following cases: + // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it. + // Therefore, if compare oper is == or != against false(0), we will + // be checking opposite of what is required. + // + // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it. + // Therefore, if compare oper is == or != against true(1), we will + // be checking opposite of what is required. + GenTreeSIMD* simdNode = cmpOp1->AsSIMD(); + if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) + { + if (cmpOp2->IsIntegralConst(0)) + { + cmp->SetOper(GenTree::ReverseRelop(cmpOper)); + } + } + else + { + assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality); + if (cmpOp2->IsIntegralConst(1)) + { + cmp->SetOper(GenTree::ReverseRelop(cmpOper)); + } + } } } -} #endif // FEATURE_SIMD +} #ifdef DEBUG void Lowering::DumpNodeInfoMap() diff --git a/src/jit/lower.h b/src/jit/lower.h index f09df7a836..ebee64117b 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -47,35 +47,77 @@ public: static void getCastDescription(GenTreePtr treeNode, CastInfo* castInfo); + // This variant of LowerRange is called from outside of the main Lowering pass, + // so it creates its own instance of Lowering to do so. + void LowerRange(BasicBlock* block, LIR::ReadOnlyRange& range) + { + Lowering lowerer(comp, m_lsra); + lowerer.m_block = block; + + lowerer.LowerRange(range); + } + +private: // LowerRange handles new code that is introduced by or after Lowering. - void LowerRange(LIR::Range&& range) + void LowerRange(LIR::ReadOnlyRange& range) { for (GenTree* newNode : range) { LowerNode(newNode); } } + void LowerRange(GenTree* firstNode, GenTree* lastNode) + { + LIR::ReadOnlyRange range(firstNode, lastNode); + LowerRange(range); + } + + // ContainCheckRange handles new code that is introduced by or after Lowering, + // and that is known to be already in Lowered form. + void ContainCheckRange(LIR::ReadOnlyRange& range) + { + for (GenTree* newNode : range) + { + ContainCheckNode(newNode); + } + } + void ContainCheckRange(GenTree* firstNode, GenTree* lastNode) + { + LIR::ReadOnlyRange range(firstNode, lastNode); + ContainCheckRange(range); + } + + void InsertTreeBeforeAndContainCheck(GenTree* insertionPoint, GenTree* tree) + { + LIR::Range range = LIR::SeqTree(comp, tree); + ContainCheckRange(range); + BlockRange().InsertBefore(insertionPoint, std::move(range)); + } + + void ContainCheckNode(GenTree* node); -private: void ContainCheckDivOrMod(GenTreeOp* node); void ContainCheckReturnTrap(GenTreeOp* node); void ContainCheckArrOffset(GenTreeArrOffs* node); void ContainCheckLclHeap(GenTreeOp* node); void ContainCheckRet(GenTreeOp* node); - void ContainCheckBinary(GenTreeOp* node); + void ContainCheckJTrue(GenTreeOp* node); + + void ContainCheckCallOperands(GenTreeCall* call); + void ContainCheckIndir(GenTreeIndir* indirNode); + void ContainCheckStoreIndir(GenTreeIndir* indirNode); void ContainCheckMul(GenTreeOp* node); void ContainCheckShiftRotate(GenTreeOp* node); void ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc); - void ContainCheckIndir(GenTreeIndir* indirNode); void ContainCheckCast(GenTreeCast* node); void ContainCheckCompare(GenTreeOp* node); + void ContainCheckBinary(GenTreeOp* node); void ContainCheckBoundsChk(GenTreeBoundsChk* node); #ifdef _TARGET_XARCH_ void ContainCheckFloatBinary(GenTreeOp* node); void ContainCheckIntrinsic(GenTreeOp* node); #endif // _TARGET_XARCH_ #ifdef FEATURE_SIMD - void ContainCheckJTrue(GenTreeOp* node); void ContainCheckSIMD(GenTreeSIMD* simdNode); #endif // FEATURE_SIMD @@ -153,6 +195,21 @@ private: return new (comp, GT_LEA) GenTreeAddrMode(resultType, base, index, 0, 0); } + // Replace the definition of the given use with a lclVar, allocating a new temp + // if 'tempNum' is BAD_VAR_NUM. + unsigned ReplaceWithLclVar(LIR::Use& use, unsigned tempNum = BAD_VAR_NUM) + { + GenTree* oldUseNode = use.Def(); + if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM)) + { + unsigned newLclNum = use.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), tempNum); + GenTree* newUseNode = use.Def(); + ContainCheckRange(oldUseNode->gtNext, newUseNode); + return newLclNum; + } + return oldUseNode->AsLclVarCommon()->gtLclNum; + } + // returns true if the tree can use the read-modify-write memory instruction form bool isRMWRegOper(GenTreePtr tree); @@ -236,12 +293,12 @@ private: int GetOperandSourceCount(GenTree* node); int GetIndirSourceCount(GenTreeIndir* indirTree); + void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); void TreeNodeInfoInitStoreLoc(GenTree* tree); void TreeNodeInfoInitReturn(GenTree* tree); void TreeNodeInfoInitShiftRotate(GenTree* tree); - void TreeNodeInfoInitPutArgReg( - GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs); + void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node); void TreeNodeInfoInitCall(GenTreeCall* call); void TreeNodeInfoInitCmp(GenTreePtr tree); void TreeNodeInfoInitStructArg(GenTreePtr structArg); @@ -251,36 +308,37 @@ private: void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree); void TreeNodeInfoInitIndir(GenTreeIndir* indirTree); void TreeNodeInfoInitGCWriteBarrier(GenTree* tree); -#if !CPU_LOAD_STORE_ARCH - bool TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd); -#endif + void TreeNodeInfoInitCast(GenTree* tree); + +#if defined(_TARGET_XARCH_) + void TreeNodeInfoInitMul(GenTreePtr tree); + void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0); +#endif // defined(_TARGET_XARCH_) + #ifdef FEATURE_SIMD - void TreeNodeInfoInitSIMD(GenTree* tree); + void TreeNodeInfoInitSIMD(GenTreeSIMD* tree); #endif // FEATURE_SIMD - void TreeNodeInfoInitCast(GenTree* tree); + + void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode); #ifdef _TARGET_ARM64_ void LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info); - void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info); #endif // _TARGET_ARM64_ #ifdef _TARGET_ARM_ void LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info); - void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info); #endif // _TARGET_ARM64_ -#ifdef FEATURE_PUT_STRUCT_ARG_STK void LowerPutArgStk(GenTreePutArgStk* tree); - void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree); #ifdef _TARGET_ARM_ - void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree, TreeNodeInfo& info, fgArgTabEntryPtr argInfo); + void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree); #endif -#endif // FEATURE_PUT_STRUCT_ARG_STK void TreeNodeInfoInitLclHeap(GenTree* tree); void DumpNodeInfoMap(); // Per tree node member functions - void LowerStoreInd(GenTree* node); + void LowerStoreIndir(GenTreeIndir* node); GenTree* LowerAdd(GenTree* node); - GenTree* LowerUnsignedDivOrMod(GenTreeOp* divMod); + bool LowerUnsignedDivOrMod(GenTreeOp* divMod); + GenTree* LowerConstIntDivOrMod(GenTree* node); GenTree* LowerSignedDivOrMod(GenTree* node); void LowerBlockStore(GenTreeBlk* blkNode); @@ -290,11 +348,6 @@ private: GenTree* LowerSwitch(GenTree* node); void LowerCast(GenTree* node); -#if defined(_TARGET_XARCH_) - void TreeNodeInfoInitMul(GenTreePtr tree); - void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0); -#endif // defined(_TARGET_XARCH_) - #if !CPU_LOAD_STORE_ARCH bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd); bool IsBinOpInRMWStoreInd(GenTreePtr tree); @@ -307,6 +360,9 @@ private: GenTree* LowerArrElem(GenTree* node); void LowerRotate(GenTree* tree); void LowerShift(GenTreeOp* shift); +#ifdef FEATURE_SIMD + void LowerSIMD(GenTreeSIMD* simdNode); +#endif // FEATURE_SIMD // Utility functions void MorphBlkIntoHelperCall(GenTreePtr pTree, GenTreePtr treeStmt); @@ -325,7 +381,7 @@ private: bool IsContainableImmed(GenTree* parentNode, GenTree* childNode); // Return true if 'node' is a containable memory op. - bool IsContainableMemoryOp(GenTree* node, bool useTracked); + bool IsContainableMemoryOp(GenTree* node); // Makes 'childNode' contained in the 'parentNode' void MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode); diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index 7104577839..f944b42a05 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -218,6 +218,20 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) } //------------------------------------------------------------------------ +// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained. +// +// Arguments: +// node - The indirect store node (GT_STORE_IND) of interest +// +// Return Value: +// None. +// +void Lowering::LowerStoreIndir(GenTreeIndir* node) +{ + ContainCheckStoreIndir(node); +} + +//------------------------------------------------------------------------ // LowerBlockStore: Set block store type // // Arguments: @@ -255,6 +269,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) GenTreePtr initVal = source; if (initVal->OperIsInitVal()) { + initVal->SetContained(); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; @@ -276,7 +291,11 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) // the largest width store of the desired inline expansion. ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; - if (size < REGSIZE_BYTES) + if (fill == 0) + { + MakeSrcContained(blkNode, source); + } + else if (size < REGSIZE_BYTES) { initVal->gtIntCon.gtIconVal = 0x01010101 * fill; } @@ -348,6 +367,16 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } + // CopyObj or CopyBlk + if (source->gtOper == GT_IND) + { + MakeSrcContained(blkNode, source); + } + else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) + { + assert(source->IsLocal()); + MakeSrcContained(blkNode, source); + } } } @@ -453,6 +482,7 @@ void Lowering::LowerRotate(GenTreePtr tree) } tree->ChangeOper(GT_ROR); } + ContainCheckShiftRotate(tree->AsOp()); } //------------------------------------------------------------------------ @@ -460,31 +490,97 @@ void Lowering::LowerRotate(GenTreePtr tree) //------------------------------------------------------------------------ //------------------------------------------------------------------------ -// ContainCheckIndir: Determine whether operands of an indir should be contained. +// ContainCheckCallOperands: Determine whether operands of a call should be contained. // // Arguments: -// node - The indirection node of interest -// -// Notes: -// This is called for both store and load indirections. +// call - The call node of interest // // Return Value: // None. // -void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) +void Lowering::ContainCheckCallOperands(GenTreeCall* call) { -#ifdef _TARGET_ARM64_ - if (indirNode->OperIs(GT_STOREIND)) + GenTree* ctrlExpr = call->gtControlExpr; + // If there is an explicit this pointer, we don't want that node to produce anything + // as it is redundant + if (call->gtCallObjp != nullptr) { - GenTree* src = indirNode->gtOp.gtOp2; - if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0)) + GenTreePtr thisPtrNode = call->gtCallObjp; + + if (thisPtrNode->canBeContained()) + { + MakeSrcContained(call, thisPtrNode); + if (thisPtrNode->gtOper == GT_PUTARG_REG) + { + MakeSrcContained(call, thisPtrNode->gtOp.gtOp1); + } + } + } + GenTreePtr args = call->gtCallArgs; + while (args) + { + GenTreePtr arg = args->gtOp.gtOp1; + if (!(args->gtFlags & GTF_LATE_ARG)) { - // an integer zero for 'src' can be contained. - MakeSrcContained(indirNode, src); + TreeNodeInfo* argInfo = &(arg->gtLsraInfo); + if (arg->gtOper == GT_PUTARG_STK) + { + GenTreePtr putArgChild = arg->gtOp.gtOp1; + if (putArgChild->OperGet() == GT_FIELD_LIST) + { + MakeSrcContained(arg, putArgChild); + } + else if (putArgChild->OperGet() == GT_OBJ) + { + MakeSrcContained(arg, putArgChild); + GenTreePtr objChild = putArgChild->gtOp.gtOp1; + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + MakeSrcContained(putArgChild, objChild); + } + } + } } + args = args->gtOp.gtOp2; + } +} + +//------------------------------------------------------------------------ +// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckStoreIndir(GenTreeIndir* node) +{ +#ifdef _TARGET_ARM64_ + GenTree* src = node->gtOp.gtOp2; + if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0)) + { + // an integer zero for 'src' can be contained. + MakeSrcContained(node, src); } #endif // _TARGET_ARM64_ + ContainCheckIndir(node); +} +//------------------------------------------------------------------------ +// ContainCheckIndir: Determine whether operands of an indir should be contained. +// +// Arguments: +// indirNode - The indirection node of interest +// +// Notes: +// This is called for both store and load indirections. +// +// Return Value: +// None. +// +void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) +{ // If this is the rhs of a block copy it will be handled when we handle the store. if (indirNode->TypeGet() == TYP_STRUCT) { diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 1c679478aa..b8f9b9d72c 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -30,8 +30,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" // xarch supports both ROL and ROR instructions so no lowering is required. -void Lowering::LowerRotate(GenTreePtr tree) +void Lowering::LowerRotate(GenTree* tree) { + ContainCheckShiftRotate(tree->AsOp()); } //------------------------------------------------------------------------ @@ -76,6 +77,7 @@ void Lowering::LowerShift(GenTreeOp* shift) BlockRange().Remove(andOp); BlockRange().Remove(maskOp); } + ContainCheckShiftRotate(shift); } //------------------------------------------------------------------------ @@ -86,12 +88,11 @@ void Lowering::LowerShift(GenTreeOp* shift) // // Notes: // This involves: +// - Handling of contained immediates. // - Widening operations of unsigneds. void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) { - GenTree* op1 = storeLoc->gtGetOp1(); - // Try to widen the ops if they are going into a local var. if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (storeLoc->gtOp1->gtOper == GT_CNS_INT)) { @@ -140,6 +141,39 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) } } } + ContainCheckStoreLoc(storeLoc); +} + +//------------------------------------------------------------------------ +// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained. +// +// Arguments: +// node - The indirect store node (GT_STORE_IND) of interest +// +// Return Value: +// None. +// +void Lowering::LowerStoreIndir(GenTreeIndir* node) +{ + // Mark all GT_STOREIND nodes to indicate that it is not known + // whether it represents a RMW memory op. + node->AsStoreInd()->SetRMWStatusDefault(); + + if (!varTypeIsFloating(node)) + { + // Perform recognition of trees with the following structure: + // StoreInd(addr, BinOp(expr, GT_IND(addr))) + // to be able to fold this into an instruction of the form + // BINOP [addr], register + // where register is the actual place where 'expr' is computed. + // + // SSE2 doesn't support RMW form of instructions. + if (LowerRMWMemOp(node)) + { + return; + } + } + ContainCheckStoreIndir(node); } //------------------------------------------------------------------------ @@ -178,6 +212,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) GenTree* initVal = source; if (initVal->OperIsInitVal()) { + initVal->SetContained(); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; @@ -218,11 +253,19 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill; initVal->gtType = TYP_LONG; + if ((fill == 0) && ((size & 0xf) == 0)) + { + MakeSrcContained(blkNode, source); + } } #else // !_TARGET_AMD64_ initVal->gtIntCon.gtIconVal = 0x01010101 * fill; #endif // !_TARGET_AMD64_ + if ((fill == 0) && ((size & 0xf) == 0)) + { + MakeSrcContained(blkNode, source); + } blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else @@ -239,134 +282,165 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) #endif // !_TARGET_AMD64_ } } - else if (blkNode->gtOper == GT_STORE_OBJ) + else { - // CopyObj + if (blkNode->gtOper == GT_STORE_OBJ) + { + // CopyObj - GenTreeObj* cpObjNode = blkNode->AsObj(); + GenTreeObj* cpObjNode = blkNode->AsObj(); - unsigned slots = cpObjNode->gtSlots; + unsigned slots = cpObjNode->gtSlots; #ifdef DEBUG - // CpObj must always have at least one GC-Pointer as a member. - assert(cpObjNode->gtGcPtrCount > 0); - - assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); - - CORINFO_CLASS_HANDLE clsHnd = cpObjNode->gtClass; - size_t classSize = comp->info.compCompHnd->getClassSize(clsHnd); - size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); - - // Currently, the EE always round up a class data structure so - // we are not handling the case where we have a non multiple of pointer sized - // struct. This behavior may change in the future so in order to keeps things correct - // let's assert it just to be safe. Going forward we should simply - // handle this case. - assert(classSize == blkSize); - assert((blkSize / TARGET_POINTER_SIZE) == slots); - assert(cpObjNode->HasGCPtr()); + // CpObj must always have at least one GC-Pointer as a member. + assert(cpObjNode->gtGcPtrCount > 0); + + assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); + + CORINFO_CLASS_HANDLE clsHnd = cpObjNode->gtClass; + size_t classSize = comp->info.compCompHnd->getClassSize(clsHnd); + size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); + + // Currently, the EE always round up a class data structure so + // we are not handling the case where we have a non multiple of pointer sized + // struct. This behavior may change in the future so in order to keeps things correct + // let's assert it just to be safe. Going forward we should simply + // handle this case. + assert(classSize == blkSize); + assert((blkSize / TARGET_POINTER_SIZE) == slots); + assert(cpObjNode->HasGCPtr()); #endif - bool IsRepMovsProfitable = false; - - // If the destination is not on the stack, let's find out if we - // can improve code size by using rep movsq instead of generating - // sequences of movsq instructions. - if (!dstAddr->OperIsLocalAddr()) - { - // Let's inspect the struct/class layout and determine if it's profitable - // to use rep movsq for copying non-gc memory instead of using single movsq - // instructions for each memory slot. - unsigned i = 0; - BYTE* gcPtrs = cpObjNode->gtGcPtrs; + bool IsRepMovsProfitable = false; - do + // If the destination is not on the stack, let's find out if we + // can improve code size by using rep movsq instead of generating + // sequences of movsq instructions. + if (!dstAddr->OperIsLocalAddr()) { - unsigned nonGCSlots = 0; - // Measure a contiguous non-gc area inside the struct and note the maximum. - while (i < slots && gcPtrs[i] == TYPE_GC_NONE) - { - nonGCSlots++; - i++; - } + // Let's inspect the struct/class layout and determine if it's profitable + // to use rep movsq for copying non-gc memory instead of using single movsq + // instructions for each memory slot. + unsigned i = 0; + BYTE* gcPtrs = cpObjNode->gtGcPtrs; - while (i < slots && gcPtrs[i] != TYPE_GC_NONE) + do { - i++; - } + unsigned nonGCSlots = 0; + // Measure a contiguous non-gc area inside the struct and note the maximum. + while (i < slots && gcPtrs[i] == TYPE_GC_NONE) + { + nonGCSlots++; + i++; + } - if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT) - { - IsRepMovsProfitable = true; - break; - } - } while (i < slots); - } - else if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) - { - IsRepMovsProfitable = true; - } + while (i < slots && gcPtrs[i] != TYPE_GC_NONE) + { + i++; + } - // There are two cases in which we need to materialize the - // struct size: - // a) When the destination is on the stack we don't need to use the - // write barrier, we can just simply call rep movsq and get a win in codesize. - // b) If we determine we have contiguous non-gc regions in the struct where it's profitable - // to use rep movsq instead of a sequence of single movsq instructions. According to the - // Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where - // the entire operation takes 20 cycles and encodes in 5 bytes (moving RCX, and calling rep movsq). - if (IsRepMovsProfitable) - { - // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq. - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr; + if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT) + { + IsRepMovsProfitable = true; + break; + } + } while (i < slots); + } + else if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) + { + IsRepMovsProfitable = true; + } + + // There are two cases in which we need to materialize the + // struct size: + // a) When the destination is on the stack we don't need to use the + // write barrier, we can just simply call rep movsq and get a win in codesize. + // b) If we determine we have contiguous non-gc regions in the struct where it's profitable + // to use rep movsq instead of a sequence of single movsq instructions. According to the + // Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where + // the entire operation takes 20 cycles and encodes in 5 bytes (moving RCX, and calling rep movsq). + if (IsRepMovsProfitable) + { + // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq. + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr; + } + else + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + } } else { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; - } - } - else - { - assert((blkNode->OperGet() == GT_STORE_BLK) || (blkNode->OperGet() == GT_STORE_DYN_BLK)); - // CopyBlk - // In case of a CpBlk with a constant size and less than CPBLK_MOVS_LIMIT size - // we can use rep movs to generate code instead of the helper call. + assert((blkNode->OperGet() == GT_STORE_BLK) || (blkNode->OperGet() == GT_STORE_DYN_BLK)); + // CopyBlk + // In case of a CpBlk with a constant size and less than CPBLK_MOVS_LIMIT size + // we can use rep movs to generate code instead of the helper call. - // This threshold will decide between using the helper or let the JIT decide to inline - // a code sequence of its choice. - unsigned helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT); + // This threshold will decide between using the helper or let the JIT decide to inline + // a code sequence of its choice. + unsigned helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT); - // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86 - if ((size != 0) && (size <= helperThreshold)) - { - // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. - // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of - // our framework assemblies, so this is the main code generation scheme we'll use. - if (size <= CPBLK_UNROLL_LIMIT) + // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86 + if ((size != 0) && (size <= helperThreshold)) { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. + // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of + // our framework assemblies, so this is the main code generation scheme we'll use. + if (size <= CPBLK_UNROLL_LIMIT) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + // If src or dst are on stack, we don't have to generate the address + // into a register because it's just some constant+SP. + if ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr()) + { + MakeSrcContained(blkNode, srcAddrOrFill); + } + + if (dstAddr->OperIsLocalAddr()) + { + MakeSrcContained(blkNode, dstAddr); + } + } + else + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr; + } } +#ifdef _TARGET_AMD64_ + else + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } +#elif defined(_TARGET_X86_) else { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr; } +#endif // _TARGET_X86_ + assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid); } -#ifdef _TARGET_AMD64_ - else + + // CopyObj or CopyBlk + if (source->gtOper == GT_IND) { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + // The GT_IND is contained, but the address must be in a register unless it is local. + MakeSrcContained(blkNode, source); + GenTree* addr = source->AsIndir()->Addr(); + if (!addr->OperIsLocalAddr()) + { + addr->ClearContained(); + } } -#elif defined(_TARGET_X86_) - else + else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr; + assert(source->IsLocal()); + MakeSrcContained(blkNode, source); } -#endif // _TARGET_X86_ - assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid); } } -#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // LowerPutArgStk: Lower a GT_PUTARG_STK. // @@ -441,7 +515,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) #endif // DEBUG head->gtLsraInfo = fieldList->gtLsraInfo; - head->gtClearReg(comp); BlockRange().InsertAfter(fieldList, head); BlockRange().Remove(fieldList); @@ -473,6 +546,38 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) putArgStk->gtNumberReferenceSlots++; } + // For x86 we must mark all integral fields as contained or reg-optional, and handle them + // accordingly in code generation, since we may have up to 8 fields, which cannot all be in + // registers to be consumed atomically by the call. + if (varTypeIsIntegralOrI(fieldNode)) + { + if (fieldNode->OperGet() == GT_LCL_VAR) + { + LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]); + if (!varDsc->lvDoNotEnregister) + { + SetRegOptional(fieldNode); + } + else + { + MakeSrcContained(putArgStk, fieldNode); + } + } + else if (fieldNode->IsIntCnsFitsInI32()) + { + MakeSrcContained(putArgStk, fieldNode); + } + else + { + // For the case where we cannot directly push the value, if we run out of registers, + // it would be better to defer computation until we are pushing the arguments rather + // than spilling, but this situation is not all that common, as most cases of promoted + // structs do not have a large number of fields, and of those most are lclVars or + // copy-propagated constants. + SetRegOptional(fieldNode); + } + } + prevOffset = fieldOffset; } @@ -494,15 +599,55 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) } #endif // _TARGET_X86_ + GenTreePtr src = putArgStk->gtOp1; + +#ifdef FEATURE_PUT_STRUCT_ARG_STK if (putArgStk->TypeGet() != TYP_STRUCT) +#endif // FEATURE_PUT_STRUCT_ARG_STK { + // If the child of GT_PUTARG_STK is a constant, we don't need a register to + // move it to memory (stack location). + // + // On AMD64, we don't want to make 0 contained, because we can generate smaller code + // by zeroing a register and then storing it. E.g.: + // xor rdx, rdx + // mov gword ptr [rsp+28H], rdx + // is 2 bytes smaller than: + // mov gword ptr [rsp+28H], 0 + // + // On x86, we push stack arguments; we don't use 'mov'. So: + // push 0 + // is 1 byte smaller than: + // xor rdx, rdx + // push rdx + + if (IsContainableImmed(putArgStk, src) +#if defined(_TARGET_AMD64_) + && !src->IsIntegralConst(0) +#endif // _TARGET_AMD64_ + ) + { + MakeSrcContained(putArgStk, src); + } return; } +#ifdef FEATURE_PUT_STRUCT_ARG_STK GenTreePtr dst = putArgStk; - GenTreePtr src = putArgStk->gtOp1; GenTreePtr srcAddr = nullptr; + bool haveLocalAddr = false; + if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND)) + { + srcAddr = src->gtOp.gtOp1; + assert(srcAddr != nullptr); + haveLocalAddr = srcAddr->OperIsLocalAddr(); + } + else + { + assert(varTypeIsSIMD(putArgStk)); + } + // In case of a CpBlk we could use a helper call. In case of putarg_stk we // can't do that since the helper call could kill some already set up outgoing args. // TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj. @@ -545,8 +690,17 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) { putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr; } -} + // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree. + MakeSrcContained(putArgStk, src); + if (haveLocalAddr) + { + // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary + // copies. + // + MakeSrcContained(putArgStk, srcAddr); + } #endif // FEATURE_PUT_STRUCT_ARG_STK +} /* Lower GT_CAST(srcType, DstType) nodes. * @@ -587,10 +741,10 @@ void Lowering::LowerCast(GenTree* tree) { assert(tree->OperGet() == GT_CAST); - GenTreePtr op1 = tree->gtOp.gtOp1; - var_types dstType = tree->CastToType(); - var_types srcType = op1->TypeGet(); - var_types tmpType = TYP_UNDEF; + GenTreePtr castOp = tree->gtCast.CastOp(); + var_types castToType = tree->CastToType(); + var_types srcType = castOp->TypeGet(); + var_types tmpType = TYP_UNDEF; // force the srcType to unsigned if GT_UNSIGNED flag is set if (tree->gtFlags & GTF_UNSIGNED) @@ -600,52 +754,96 @@ void Lowering::LowerCast(GenTree* tree) // We should never see the following casts as they are expected to be lowered // apropriately or converted into helper calls by front-end. - // srcType = float/double dstType = * and overflow detecting cast + // srcType = float/double castToType = * and overflow detecting cast // Reason: must be converted to a helper call - // srcType = float/double, dstType = ulong + // srcType = float/double, castToType = ulong // Reason: must be converted to a helper call - // srcType = uint dstType = float/double + // srcType = uint castToType = float/double // Reason: uint -> float/double = uint -> long -> float/double - // srcType = ulong dstType = float + // srcType = ulong castToType = float // Reason: ulong -> float = ulong -> double -> float if (varTypeIsFloating(srcType)) { noway_assert(!tree->gtOverflow()); - noway_assert(dstType != TYP_ULONG); + noway_assert(castToType != TYP_ULONG); } else if (srcType == TYP_UINT) { - noway_assert(!varTypeIsFloating(dstType)); + noway_assert(!varTypeIsFloating(castToType)); } else if (srcType == TYP_ULONG) { - noway_assert(dstType != TYP_FLOAT); + noway_assert(castToType != TYP_FLOAT); } // Case of src is a small type and dst is a floating point type. - if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType)) + if (varTypeIsSmall(srcType) && varTypeIsFloating(castToType)) { // These conversions can never be overflow detecting ones. noway_assert(!tree->gtOverflow()); tmpType = TYP_INT; } // case of src is a floating point type and dst is a small type. - else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType)) + else if (varTypeIsFloating(srcType) && varTypeIsSmall(castToType)) { tmpType = TYP_INT; } if (tmpType != TYP_UNDEF) { - GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType); + GenTreePtr tmp = comp->gtNewCastNode(tmpType, castOp, tmpType); tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT)); tree->gtFlags &= ~GTF_UNSIGNED; tree->gtOp.gtOp1 = tmp; - BlockRange().InsertAfter(op1, tmp); + BlockRange().InsertAfter(castOp, tmp); + ContainCheckCast(tmp->AsCast()); } + + // Now determine if we have operands that should be contained. + ContainCheckCast(tree->AsCast()); } +#ifdef FEATURE_SIMD +//---------------------------------------------------------------------------------------------- +// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node. +// +// Arguments: +// simdNode - The SIMD intrinsic node. +// +void Lowering::LowerSIMD(GenTreeSIMD* simdNode) +{ + if (simdNode->TypeGet() == TYP_SIMD12) + { + // GT_SIMD node requiring to produce TYP_SIMD12 in fact + // produces a TYP_SIMD16 result + simdNode->gtType = TYP_SIMD16; + } + +#ifdef _TARGET_XARCH_ + if ((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem) && (simdNode->gtGetOp1()->OperGet() == GT_IND)) + { + // If SIMD vector is already in memory, we force its + // addr to be evaluated into a reg. This would allow + // us to generate [regBase] or [regBase+offset] or + // [regBase+sizeOf(SIMD vector baseType)*regIndex] + // to access the required SIMD vector element directly + // from memory. + // + // TODO-CQ-XARCH: If addr of GT_IND is GT_LEA, we + // might be able update GT_LEA to fold the regIndex + // or offset in some cases. Instead with this + // approach we always evaluate GT_LEA into a reg. + // Ideally, we should be able to lower GetItem intrinsic + // into GT_IND(newAddr) where newAddr combines + // the addr of SIMD vector with the given index. + simdNode->gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; + } +#endif + ContainCheckSIMD(simdNode); +} +#endif // FEATURE_SIMD + //---------------------------------------------------------------------------------------------- // Lowering::IsRMWIndirCandidate: // Returns true if the given operand is a candidate indirection for a read-modify-write @@ -905,8 +1103,7 @@ bool Lowering::IsRMWMemOpRootedAtStoreInd(GenTreePtr tree, GenTreePtr* outIndirC if (GenTree::OperIsBinary(oper)) { // Return if binary op is not one of the supported operations for RMW of memory. - if (oper != GT_ADD && oper != GT_SUB && oper != GT_AND && oper != GT_OR && oper != GT_XOR && - !GenTree::OperIsShiftOrRotate(oper)) + if (!GenTree::OperIsRMWMemOp(oper)) { storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER); return false; @@ -1088,15 +1285,19 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree) assert(GenTree::OperIsBinary(tree->OperGet())); assert(tree->OperIsCommutative() || tree->OperIsCompare() || tree->OperIs(GT_CMP)); - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - GenTree* preferredOp = nullptr; + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + assert(!op1->IsRegOptional() && !op2->IsRegOptional()); + + // We default to op1, as op2 is likely to have the shorter lifetime. + GenTree* preferredOp = op1; // This routine uses the following heuristics: // // a) If both are register candidates, marking the one with lower weighted // ref count as reg-optional would likely be beneficial as it has - // higher probability of not getting a register. + // higher probability of not getting a register. Note that we use !lvDoNotEnregister + // here because this is being done while we are adding lclVars for Lowering. // // b) op1 = tracked local and op2 = untracked local: LSRA creates two // ref positions for op2: a def and use position. op2's def position @@ -1131,51 +1332,25 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree) LclVarDsc* v1 = comp->lvaTable + op1->AsLclVarCommon()->GetLclNum(); LclVarDsc* v2 = comp->lvaTable + op2->AsLclVarCommon()->GetLclNum(); - bool v1IsRegCandidate = !v1->lvDoNotEnregister && v1->lvTracked; - bool v2IsRegCandidate = !v2->lvDoNotEnregister && v2->lvTracked; + bool v1IsRegCandidate = !v1->lvDoNotEnregister; + bool v2IsRegCandidate = !v2->lvDoNotEnregister; if (v1IsRegCandidate && v2IsRegCandidate) { - // Both are tracked enregisterable locals. The one with lower weight is less likely + // Both are enregisterable locals. The one with lower weight is less likely // to get a register and hence beneficial to mark the one with lower // weight as reg optional. - if (v1->lvRefCntWtd < v2->lvRefCntWtd) - { - preferredOp = op1; - } - else + // If either is not tracked, it may be that it was introduced after liveness + // was run, in which case we will always prefer op1 (should we use raw refcnt??). + if (v1->lvTracked && v2->lvTracked && (v1->lvRefCntWtd >= v2->lvRefCntWtd)) { preferredOp = op2; } } - else if (v2IsRegCandidate) - { - // v1 is not a reg candidate and its use position is less likely to get a register. - preferredOp = op1; - } - else if (v1IsRegCandidate) - { - // v2 is not a reg candidate and its def position always - // needs a reg. Hence it is better to mark v1 as - // reg optional. - preferredOp = op1; - } - else - { - preferredOp = op1; - } } - else if (op1->OperGet() == GT_LCL_VAR) - { - preferredOp = op1; - } - else if (op2->OperGet() == GT_LCL_VAR) + else if (!(op1->OperGet() == GT_LCL_VAR) && (op2->OperGet() == GT_LCL_VAR)) { preferredOp = op2; } - else - { - preferredOp = op1; - } return preferredOp; } @@ -1185,121 +1360,102 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree) //------------------------------------------------------------------------ //------------------------------------------------------------------------ -// LowerRMWMemOp: Determine if this is a valid RMW mem op, and if so lower it accordingly +// ContainCheckCallOperands: Determine whether operands of a call should be contained. // // Arguments: -// node - The indirect store node (GT_STORE_IND) of interest +// call - The call node of interest // // Return Value: -// Returns true if 'node' is a valid RMW mem op; false otherwise. +// None. // -bool Lowering::LowerRMWMemOp(GenTreeIndir* storeInd) +void Lowering::ContainCheckCallOperands(GenTreeCall* call) { - assert(storeInd->OperGet() == GT_STOREIND); - - // SSE2 doesn't support RMW on float values - assert(!varTypeIsFloating(storeInd)); - - // Terminology: - // indirDst = memory write of an addr mode (i.e. storeind destination) - // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op) - // indirCandidate = memory read i.e. a gtInd of an addr mode - // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node) - - GenTreePtr indirCandidate = nullptr; - GenTreePtr indirOpSource = nullptr; - - if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource)) + GenTree* ctrlExpr = call->gtControlExpr; + if (call->gtCallType == CT_INDIRECT) { - JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n", - storeInd->AsStoreInd()->GetRMWStatus()); - DISPTREERANGE(BlockRange(), storeInd); - return false; - } + // either gtControlExpr != null or gtCallAddr != null. + // Both cannot be non-null at the same time. + assert(ctrlExpr == nullptr); + assert(call->gtCallAddr != nullptr); + ctrlExpr = call->gtCallAddr; - GenTreePtr indirDst = storeInd->gtGetOp1(); - GenTreePtr indirSrc = storeInd->gtGetOp2(); - genTreeOps oper = indirSrc->OperGet(); - - // At this point we have successfully detected a RMW memory op of one of the following forms - // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR - // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR - // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations - // - // Here indirSrc = one of the supported binary or unary operation for RMW of memory - // indirCandidate = a GT_IND node - // indirCandidateChild = operand of GT_IND indirCandidate - // - // The logic below does the following - // Make indirOpSource contained. - // Make indirSrc contained. - // Make indirCandidate contained. - // Make indirCandidateChild contained. - // Make indirDst contained except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr - // base. - // +#ifdef _TARGET_X86_ + // Fast tail calls aren't currently supported on x86, but if they ever are, the code + // below that handles indirect VSD calls will need to be fixed. + assert(!call->IsFastTailCall() || !call->IsVirtualStub()); +#endif // _TARGET_X86_ + } - if (GenTree::OperIsBinary(oper)) + // set reg requirements on call target represented as control sequence. + if (ctrlExpr != nullptr) { - // On Xarch RMW operations require the source to be an immediate or in a register. - // Therefore, if we have previously marked the indirOpSource as contained while lowering - // the binary node, we need to reset that now. - if (IsContainableMemoryOp(indirOpSource, true)) + // we should never see a gtControlExpr whose type is void. + assert(ctrlExpr->TypeGet() != TYP_VOID); + + // In case of fast tail implemented as jmp, make sure that gtControlExpr is + // computed into a register. + if (!call->IsFastTailCall()) { - indirOpSource->ClearContained(); +#ifdef _TARGET_X86_ + // On x86, we need to generate a very specific pattern for indirect VSD calls: + // + // 3-byte nop + // call dword ptr [eax] + // + // Where EAX is also used as an argument to the stub dispatch helper. Make + // sure that the call target address is computed into EAX in this case. + if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) + { + assert(ctrlExpr->isIndir()); + MakeSrcContained(call, ctrlExpr); + } + else +#endif // _TARGET_X86_ + if (ctrlExpr->isIndir()) + { + MakeSrcContained(call, ctrlExpr); + // We may have cases where we have set a register target on the ctrlExpr, but if it + // contained we must clear it. + ctrlExpr->gtRegNum = REG_NA; + } } - JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n"); } - else + // If there is an explicit this pointer, we don't want that node to produce anything + // as it is redundant + if (call->gtCallObjp != nullptr) { - assert(GenTree::OperIsUnary(oper)); - JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n"); - } - DISPTREERANGE(BlockRange(), storeInd); - - indirSrc->SetContained(); - indirCandidate->SetContained(); + GenTreePtr thisPtrNode = call->gtCallObjp; - GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1(); - indirCandidateChild->SetContained(); - - if (indirCandidateChild->OperGet() == GT_LEA) - { - GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode(); - - if (addrMode->HasBase()) + if (thisPtrNode->canBeContained()) { - assert(addrMode->Base()->OperIsLeaf()); - addrMode->Base()->SetContained(); + MakeSrcContained(call, thisPtrNode); + if (thisPtrNode->gtOper == GT_PUTARG_REG) + { + MakeSrcContained(call, thisPtrNode->gtOp.gtOp1); + } } + } - if (addrMode->HasIndex()) + GenTree* args = call->gtCallArgs; + while (args) + { + GenTree* arg = args->gtOp.gtOp1; + if (arg->gtOper == GT_PUTARG_STK) { - assert(addrMode->Index()->OperIsLeaf()); - addrMode->Index()->SetContained(); + LowerPutArgStk(arg->AsPutArgStk()); } - - indirDst->SetContained(); + args = args->gtOp.gtOp2; } - else + args = call->gtCallLateArgs; + while (args) { - assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || - indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT); - - // If it is a GT_LCL_VAR, it still needs the reg to hold the address. - // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base. - // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit - // time. Also, we don't need a reg for GT_CLS_VAR_ADDR. - if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR) - { - indirDst->SetContained(); - } - else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp)) + GenTree* arg = args->gtOp.gtOp1; + if (arg->gtOper == GT_PUTARG_STK) { - indirDst->SetContained(); + LowerPutArgStk(arg->AsPutArgStk()); } + args = args->gtOp.gtOp2; } - return true; } //------------------------------------------------------------------------ @@ -1380,74 +1536,23 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node) } //------------------------------------------------------------------------ -// ContainCheckBinary: Determine whether a binary op's operands should be contained. +// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained. // // Arguments: -// node - the node we care about +// node - pointer to the node // -void Lowering::ContainCheckBinary(GenTreeOp* node) +void Lowering::ContainCheckStoreIndir(GenTreeIndir* node) { - assert(node->OperIsBinary() && !varTypeIsFloating(node)); - - // We're not marking a constant hanging on the left of an add - // as containable so we assign it to a register having CQ impact. - // TODO-XArch-CQ: Detect this case and support both generating a single instruction - // for GT_ADD(Constant, SomeTree) - - GenTree* op1 = node->gtOp1; - GenTree* op2 = node->gtOp2; - - // We can directly encode the second operand if it is either a containable constant or a memory-op. - // In case of memory-op, we can encode it directly provided its type matches with 'tree' type. - // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types - // do not match, they get normalized (i.e. sign/zero extended) on load into a register. - bool directlyEncodable = false; - bool binOpInRMW = false; - GenTreePtr operand = nullptr; - - if (IsContainableImmed(node, op2)) - { - directlyEncodable = true; - operand = op2; - } - else - { - binOpInRMW = IsBinOpInRMWStoreInd(node); - if (!binOpInRMW) - { - const unsigned operatorSize = genTypeSize(node->TypeGet()); - if (IsContainableMemoryOp(op2, true) && (genTypeSize(op2->TypeGet()) == operatorSize)) - { - directlyEncodable = true; - operand = op2; - } - else if (node->OperIsCommutative()) - { - if (IsContainableImmed(node, op1) || - (IsContainableMemoryOp(op1, true) && (genTypeSize(op1->TypeGet()) == operatorSize) && - IsSafeToContainMem(node, op1))) - { - // If it is safe, we can reverse the order of operands of commutative operations for efficient - // codegen - directlyEncodable = true; - operand = op1; - } - } - } - } - - if (directlyEncodable) + // If the source is a containable immediate, make it contained, unless it is + // an int-size or larger store of zero to memory, because we can generate smaller code + // by zeroing a register and then storing it. + GenTree* src = node->gtOp.gtOp2; + if (IsContainableImmed(node, src) && + (!src->IsIntegralConst(0) || varTypeIsSmall(node) || node->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR)) { - assert(operand != nullptr); - MakeSrcContained(node, operand); - } - else if (!binOpInRMW) - { - // If this binary op neither has contained operands, nor is a - // Read-Modify-Write (RMW) operation, we can mark its operands - // as reg optional. - SetRegOptionalForBinOp(node); + MakeSrcContained(node, src); } + ContainCheckIndir(node); } //------------------------------------------------------------------------ @@ -1471,11 +1576,11 @@ void Lowering::ContainCheckMul(GenTreeOp* node) { assert(node->OperGet() == GT_MUL); - if (IsContainableMemoryOp(op2, true) || op2->IsCnsNonZeroFltOrDbl()) + if (IsContainableMemoryOp(op2) || op2->IsCnsNonZeroFltOrDbl()) { MakeSrcContained(node, op2); } - else if (op1->IsCnsNonZeroFltOrDbl() || (IsContainableMemoryOp(op1, true) && IsSafeToContainMem(node, op1))) + else if (op1->IsCnsNonZeroFltOrDbl() || (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1))) { // Since GT_MUL is commutative, we will try to re-order operands if it is safe to // generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp) @@ -1539,7 +1644,7 @@ void Lowering::ContainCheckMul(GenTreeOp* node) } MakeSrcContained(node, imm); // The imm is always contained - if (IsContainableMemoryOp(other, true)) + if (IsContainableMemoryOp(other)) { memOp = other; // memOp may be contained below } @@ -1552,12 +1657,11 @@ void Lowering::ContainCheckMul(GenTreeOp* node) // if (memOp == nullptr) { - if (IsContainableMemoryOp(op2, true) && (op2->TypeGet() == node->TypeGet()) && IsSafeToContainMem(node, op2)) + if (IsContainableMemoryOp(op2) && (op2->TypeGet() == node->TypeGet()) && IsSafeToContainMem(node, op2)) { memOp = op2; } - else if (IsContainableMemoryOp(op1, true) && (op1->TypeGet() == node->TypeGet()) && - IsSafeToContainMem(node, op1)) + else if (IsContainableMemoryOp(op1) && (op1->TypeGet() == node->TypeGet()) && IsSafeToContainMem(node, op1)) { memOp = op1; } @@ -1699,7 +1803,7 @@ void Lowering::ContainCheckCast(GenTreeCast* node) // U8 -> R8 conversion requires that the operand be in a register. if (srcType != TYP_ULONG) { - if (IsContainableMemoryOp(castOp, true) || castOp->IsCnsNonZeroFltOrDbl()) + if (IsContainableMemoryOp(castOp) || castOp->IsCnsNonZeroFltOrDbl()) { MakeSrcContained(node, castOp); } @@ -1774,7 +1878,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) { MakeSrcContained(cmp, otherOp); } - else if (IsContainableMemoryOp(otherOp, true) && ((otherOp == op2) || IsSafeToContainMem(cmp, otherOp))) + else if (IsContainableMemoryOp(otherOp) && ((otherOp == op2) || IsSafeToContainMem(cmp, otherOp))) { MakeSrcContained(cmp, otherOp); } @@ -1797,7 +1901,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) // we can treat the MemoryOp as contained. if (op1Type == op2Type) { - if (IsContainableMemoryOp(op1, true)) + if (IsContainableMemoryOp(op1)) { MakeSrcContained(cmp, op1); } @@ -1846,11 +1950,11 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) // Note that TEST does not have a r,rm encoding like CMP has but we can still // contain the second operand because the emitter maps both r,rm and rm,r to // the same instruction code. This avoids the need to special case TEST here. - if (IsContainableMemoryOp(op2, true)) + if (IsContainableMemoryOp(op2)) { MakeSrcContained(cmp, op2); } - else if (IsContainableMemoryOp(op1, true) && IsSafeToContainMem(cmp, op1)) + else if (IsContainableMemoryOp(op1) && IsSafeToContainMem(cmp, op1)) { MakeSrcContained(cmp, op1); } @@ -1872,72 +1976,206 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) } //------------------------------------------------------------------------ -// ContainCheckFloatBinary: determine whether the sources of a floating point binary node should be contained. +// LowerRMWMemOp: Determine if this is a valid RMW mem op, and if so lower it accordingly // // Arguments: -// node - pointer to the node +// node - The indirect store node (GT_STORE_IND) of interest // -void Lowering::ContainCheckFloatBinary(GenTreeOp* node) +// Return Value: +// Returns true if 'node' is a valid RMW mem op; false otherwise. +// +bool Lowering::LowerRMWMemOp(GenTreeIndir* storeInd) { - assert(node->OperIsBinary() && varTypeIsFloating(node)); + assert(storeInd->OperGet() == GT_STOREIND); - // overflow operations aren't supported on float/double types. - assert(!node->gtOverflow()); + // SSE2 doesn't support RMW on float values + assert(!varTypeIsFloating(storeInd)); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + // Terminology: + // indirDst = memory write of an addr mode (i.e. storeind destination) + // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op) + // indirCandidate = memory read i.e. a gtInd of an addr mode + // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node) - // No implicit conversions at this stage as the expectation is that - // everything is made explicit by adding casts. - assert(op1->TypeGet() == op2->TypeGet()); + GenTreePtr indirCandidate = nullptr; + GenTreePtr indirOpSource = nullptr; - if (IsContainableMemoryOp(op2, true) || op2->IsCnsNonZeroFltOrDbl()) + if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource)) { - MakeSrcContained(node, op2); + JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n", + storeInd->AsStoreInd()->GetRMWStatus()); + DISPTREERANGE(BlockRange(), storeInd); + return false; } - else if (node->OperIsCommutative() && - (op1->IsCnsNonZeroFltOrDbl() || (IsContainableMemoryOp(op1, true) && IsSafeToContainMem(node, op1)))) + + GenTreePtr indirDst = storeInd->gtGetOp1(); + GenTreePtr indirSrc = storeInd->gtGetOp2(); + genTreeOps oper = indirSrc->OperGet(); + + // At this point we have successfully detected a RMW memory op of one of the following forms + // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR + // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR + // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations + // + // Here indirSrc = one of the supported binary or unary operation for RMW of memory + // indirCandidate = a GT_IND node + // indirCandidateChild = operand of GT_IND indirCandidate + // + // The logic below does the following + // Make indirOpSource contained. + // Make indirSrc contained. + // Make indirCandidate contained. + // Make indirCandidateChild contained. + // Make indirDst contained except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr + // base. + // + + // We have already done containment analysis on the indirSrc op. + // If any of its operands are marked regOptional, reset that now. + indirSrc->AsOp()->gtOp1->ClearRegOptional(); + if (GenTree::OperIsBinary(oper)) { - // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands - // as long as it is safe so that the following efficient code sequence is generated: - // addss/sd targetReg, memOp (if op1Reg == targetReg) OR - // movaps targetReg, op2Reg; addss/sd targetReg, [memOp] - // - // Instead of - // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR - // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg - MakeSrcContained(node, op1); + // On Xarch RMW operations require the source to be an immediate or in a register. + // Therefore, if we have previously marked the indirOpSource as contained while lowering + // the binary node, we need to reset that now. + if (IsContainableMemoryOp(indirOpSource)) + { + indirOpSource->ClearContained(); + } + indirSrc->AsOp()->gtOp2->ClearRegOptional(); + JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n"); } else { - // If there are no containable operands, we can make an operand reg optional. - SetRegOptionalForBinOp(node); + assert(GenTree::OperIsUnary(oper)); + JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n"); + } + DISPTREERANGE(BlockRange(), storeInd); + + indirSrc->SetContained(); + indirCandidate->SetContained(); + + GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1(); + indirCandidateChild->SetContained(); + + if (indirCandidateChild->OperGet() == GT_LEA) + { + GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode(); + + if (addrMode->HasBase()) + { + assert(addrMode->Base()->OperIsLeaf()); + addrMode->Base()->SetContained(); + } + + if (addrMode->HasIndex()) + { + assert(addrMode->Index()->OperIsLeaf()); + addrMode->Index()->SetContained(); + } + + indirDst->SetContained(); } + else + { + assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || + indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT); + + // If it is a GT_LCL_VAR, it still needs the reg to hold the address. + // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base. + // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit + // time. Also, we don't need a reg for GT_CLS_VAR_ADDR. + if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR) + { + indirDst->SetContained(); + } + else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp)) + { + indirDst->SetContained(); + } + } + return true; } //------------------------------------------------------------------------ -// ContainCheckIntrinsic: determine whether the source of an INTRINSIC node should be contained. +// ContainCheckBinary: Determine whether a binary op's operands should be contained. // // Arguments: -// node - pointer to the node +// node - the node we care about // -void Lowering::ContainCheckIntrinsic(GenTreeOp* node) +void Lowering::ContainCheckBinary(GenTreeOp* node) { - assert(node->OperIs(GT_INTRINSIC)); - if (node->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt) + assert(node->OperIsBinary()); + + if (varTypeIsFloating(node)) { - GenTree* op1 = node->gtGetOp1(); - if (IsContainableMemoryOp(op1, true) || op1->IsCnsNonZeroFltOrDbl()) - { - MakeSrcContained(node, op1); - } - else + assert(node->OperIs(GT_ADD, GT_SUB)); + ContainCheckFloatBinary(node); + return; + } + + // Codegen of these tree nodes sets ZF and SF flags. + node->gtFlags |= GTF_ZSF_SET; + + // We're not marking a constant hanging on the left of an add + // as containable so we assign it to a register having CQ impact. + // TODO-XArch-CQ: Detect this case and support both generating a single instruction + // for GT_ADD(Constant, SomeTree) + + GenTree* op1 = node->gtOp1; + GenTree* op2 = node->gtOp2; + + // We can directly encode the second operand if it is either a containable constant or a memory-op. + // In case of memory-op, we can encode it directly provided its type matches with 'tree' type. + // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types + // do not match, they get normalized (i.e. sign/zero extended) on load into a register. + bool directlyEncodable = false; + bool binOpInRMW = false; + GenTreePtr operand = nullptr; + + if (IsContainableImmed(node, op2)) + { + directlyEncodable = true; + operand = op2; + } + else + { + binOpInRMW = IsBinOpInRMWStoreInd(node); + if (!binOpInRMW) { - // Mark the operand as reg optional since codegen can still - // generate code if op1 is on stack. - SetRegOptional(op1); + const unsigned operatorSize = genTypeSize(node->TypeGet()); + if (IsContainableMemoryOp(op2) && (genTypeSize(op2->TypeGet()) == operatorSize)) + { + directlyEncodable = true; + operand = op2; + } + else if (node->OperIsCommutative()) + { + if (IsContainableImmed(node, op1) || + (IsContainableMemoryOp(op1) && (genTypeSize(op1->TypeGet()) == operatorSize) && + IsSafeToContainMem(node, op1))) + { + // If it is safe, we can reverse the order of operands of commutative operations for efficient + // codegen + directlyEncodable = true; + operand = op1; + } + } } } + + if (directlyEncodable) + { + assert(operand != nullptr); + MakeSrcContained(node, operand); + } + else if (!binOpInRMW) + { + // If this binary op neither has contained operands, nor is a + // Read-Modify-Write (RMW) operation, we can mark its operands + // as reg optional. + SetRegOptionalForBinOp(node); + } } //------------------------------------------------------------------------ @@ -1958,7 +2196,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) { other = node->gtIndex; } - else if (IsContainableMemoryOp(node->gtIndex, true)) + else if (IsContainableMemoryOp(node->gtIndex)) { other = node->gtIndex; } @@ -1969,7 +2207,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet()) { - if (IsContainableMemoryOp(other, true)) + if (IsContainableMemoryOp(other)) { MakeSrcContained(node, other); } @@ -1981,6 +2219,31 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) } } +//------------------------------------------------------------------------ +// ContainCheckIntrinsic: determine whether the source of an INTRINSIC node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckIntrinsic(GenTreeOp* node) +{ + assert(node->OperIs(GT_INTRINSIC)); + if (node->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt) + { + GenTree* op1 = node->gtGetOp1(); + if (IsContainableMemoryOp(op1) || op1->IsCnsNonZeroFltOrDbl()) + { + MakeSrcContained(node, op1); + } + else + { + // Mark the operand as reg optional since codegen can still + // generate code if op1 is on stack. + SetRegOptional(op1); + } + } +} + #ifdef FEATURE_SIMD //---------------------------------------------------------------------------------------------- // ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node. @@ -2066,7 +2329,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) // If the index is a constant, mark it as contained. CheckImmedAndMakeContained(simdNode, op2); - if (IsContainableMemoryOp(op1, true)) + if (IsContainableMemoryOp(op1)) { MakeSrcContained(simdNode, op1); if (op1->OperGet() == GT_IND) @@ -2089,6 +2352,50 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) } #endif // FEATURE_SIMD +//------------------------------------------------------------------------ +// ContainCheckFloatBinary: determine whether the sources of a floating point binary node should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckFloatBinary(GenTreeOp* node) +{ + assert(node->OperIsBinary() && varTypeIsFloating(node)); + + // overflow operations aren't supported on float/double types. + assert(!node->gtOverflow()); + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + // No implicit conversions at this stage as the expectation is that + // everything is made explicit by adding casts. + assert(op1->TypeGet() == op2->TypeGet()); + + if (IsContainableMemoryOp(op2) || op2->IsCnsNonZeroFltOrDbl()) + { + MakeSrcContained(node, op2); + } + else if (node->OperIsCommutative() && + (op1->IsCnsNonZeroFltOrDbl() || (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1)))) + { + // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands + // as long as it is safe so that the following efficient code sequence is generated: + // addss/sd targetReg, memOp (if op1Reg == targetReg) OR + // movaps targetReg, op2Reg; addss/sd targetReg, [memOp] + // + // Instead of + // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR + // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg + MakeSrcContained(node, op1); + } + else + { + // If there are no containable operands, we can make an operand reg optional. + SetRegOptionalForBinOp(node); + } +} + #endif // _TARGET_XARCH_ #endif // !LEGACY_BACKEND diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 3d798cec34..c97fe8b8bf 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -10600,7 +10600,11 @@ void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation loca // if there is a reg indicated on the tree node, use that for dstCandidates // the exception is the NOP, which sometimes show up around late args. // TODO-Cleanup: get rid of those NOPs. - if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP) + if (node->gtRegNum == REG_STK) + { + dstCandidates = RBM_NONE; + } + else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP) { #ifdef ARM_SOFTFP if (node->OperGet() == GT_PUTARG_REG) diff --git a/src/jit/lsra.h b/src/jit/lsra.h index 30f666e97c..9503fae3e8 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -749,29 +749,6 @@ private: // Update reg state for an incoming register argument void updateRegStateForArg(LclVarDsc* argDsc); - inline void setTreeNodeInfo(GenTree* tree, TreeNodeInfo info) - { - tree->gtLsraInfo = info; - tree->gtClearReg(compiler); - - DBEXEC(VERBOSE, info.dump(this)); - } - - inline void clearDstCount(GenTree* tree) - { - tree->gtLsraInfo.dstCount = 0; - } - - inline void clearOperandCounts(GenTree* tree) - { - TreeNodeInfo& info = tree->gtLsraInfo; - info.srcCount = 0; - info.dstCount = 0; - - info.internalIntCount = 0; - info.internalFloatCount = 0; - } - inline bool isLocalDefUse(GenTree* tree) { return tree->gtLsraInfo.isLocalDefUse; diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp index 053b593e20..53da45b1cf 100644 --- a/src/jit/lsraarm.cpp +++ b/src/jit/lsraarm.cpp @@ -47,6 +47,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree) Compiler* compiler = comp; GenTree* op1 = tree->gtGetOp1(); + assert(info->dstCount == 0); if (tree->TypeGet() == TYP_LONG) { assert((op1->OperGet() == GT_LONG) && op1->isContained()); @@ -55,14 +56,12 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree) info->srcCount = 2; loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO); hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI); - info->dstCount = 0; } else { regMaskTP useCandidates = RBM_NONE; info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1; - info->dstCount = 0; if (varTypeIsStruct(tree)) { @@ -114,7 +113,7 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) LinearScan* l = m_lsra; Compiler* compiler = comp; - info->dstCount = 1; + assert(info->dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenarm.cpp): // Here '-' means don't care. @@ -218,8 +217,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) TreeNodeInfo* info = &(tree->gtLsraInfo); RegisterType registerType = TypeGet(tree); - JITDUMP("TreeNodeInfoInit for: "); - DISPNODE(tree); + if (tree->isContained()) + { + info->dstCount = 0; + assert(info->srcCount == 0); + return; + } + + // Set the default dstCount. This may be modified below. + info->dstCount = tree->IsValue() ? 1 : 0; switch (tree->OperGet()) { @@ -238,11 +244,11 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->srcCount = 0; if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr) { - info->dstCount = 1; + assert(info->dstCount == 1); } else { - info->dstCount = 0; + assert(info->dstCount == 0); } break; @@ -259,7 +265,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case CORINFO_INTRINSIC_Abs: case CORINFO_INTRINSIC_Sqrt: info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); break; default: NYI_ARM("Lowering::TreeNodeInfoInit for GT_INTRINSIC"); @@ -272,7 +278,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) { ContainCheckCast(tree->AsCast()); info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); // Non-overflow casts to/from float/double are done using SSE2 instructions // and that allow the source operand to be either a reg or memop. Given the @@ -355,31 +361,29 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_JTRUE: info->srcCount = 0; - info->dstCount = 0; - l->clearDstCount(tree->gtOp.gtOp1); + assert(info->dstCount == 0); break; case GT_JMP: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_SWITCH: // This should never occur since switch nodes must not be visible at this // point in the JIT. info->srcCount = 0; - info->dstCount = 0; // To avoid getting uninit errors. noway_assert(!"Switch must be lowered at this point"); break; case GT_JMPTABLE: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_SWITCH_TABLE: info->srcCount = 2; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_ASG: @@ -387,7 +391,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_ASG_SUB: noway_assert(!"We should never hit any assignment operator in lowering"); info->srcCount = 0; - info->dstCount = 0; break; case GT_ADD_LO: @@ -406,7 +409,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet()); info->srcCount = 2; - info->dstCount = 1; + assert(info->dstCount == 1); break; } @@ -418,14 +421,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_XOR: ContainCheckBinary(tree->AsOp()); info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_RETURNTRAP: // this just turns into a compare of its child with an int // + a conditional call info->srcCount = 1; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_MUL: @@ -442,7 +445,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_UDIV: { info->srcCount = 2; - info->dstCount = 1; + assert(info->dstCount == 1); } break; @@ -458,7 +461,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_START_NONGC: case GT_PROF_HOOK: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_LONG: @@ -473,12 +476,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->srcCount = 0; } - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_CNS_DBL: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); if (tree->TypeGet() == TYP_FLOAT) { // An int register for float constant @@ -499,18 +502,16 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_RETFILT: + assert(info->dstCount == 0); if (tree->TypeGet() == TYP_VOID) { info->srcCount = 0; - info->dstCount = 0; } else { assert(tree->TypeGet() == TYP_INT); info->srcCount = 1; - info->dstCount = 0; - info->setSrcCandidates(l, RBM_INTRET); tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET); } @@ -523,7 +524,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) { // Consumes arrLen & index - has no result info->srcCount = 2; - info->dstCount = 0; + assert(info->dstCount == 0); } break; @@ -531,12 +532,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // These must have been lowered to GT_ARR_INDEX noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_ARR_INDEX: - info->srcCount = 2; - info->dstCount = 1; + info->srcCount = 2; + assert(info->dstCount == 1); info->internalIntCount = 1; info->isInternalRegDelayFree = true; @@ -550,7 +551,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) ContainCheckArrOffset(tree->AsArrOffs()); // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. - info->dstCount = 1; + assert(info->dstCount == 1); if (tree->gtArrOffs.gtOffset->isContained()) { @@ -580,7 +581,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) { info->srcCount++; } - info->dstCount = 1; + assert(info->dstCount == 1); // An internal register may be needed too; the logic here should be in sync with the // genLeaInstruction()'s requirements for a such register. @@ -605,12 +606,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_NEG: info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_NOT: info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_LSH: @@ -633,8 +634,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_CKFINITE: - info->srcCount = 1; - info->dstCount = 1; + info->srcCount = 1; + assert(info->dstCount == 1); info->internalIntCount = 1; break; @@ -651,8 +652,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_INIT_VAL: // Always a passthrough of its child's value. - info->srcCount = 0; - info->dstCount = 0; + assert(!"INIT_VAL should always be contained"); break; case GT_LCLHEAP: @@ -661,8 +661,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_STOREIND: { - info->dstCount = 0; - GenTree* src = tree->gtOp.gtOp2; + assert(info->dstCount == 0); + GenTree* src = tree->gtOp.gtOp2; if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { @@ -678,7 +678,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_NULLCHECK: - info->dstCount = 0; + assert(info->dstCount == 0); info->srcCount = 1; info->isLocalDefUse = true; // null check is an indirection on an addr @@ -686,14 +686,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_IND: - info->dstCount = 1; + assert(info->dstCount == 1); info->srcCount = 1; TreeNodeInfoInitIndir(tree->AsIndir()); break; case GT_CATCH_ARG: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); info->setDstCandidates(l, RBM_EXCEPTION_OBJECT); break; @@ -704,14 +704,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // It will produce a result of the type of the // node, and use an internal register for the address. - info->dstCount = 1; + assert(info->dstCount == 1); assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); info->internalIntCount = 1; break; case GT_COPY: info->srcCount = 1; - info->dstCount = 1; #ifdef ARM_SOFTFP // This case currently only occurs for double types that are passed as TYP_LONG; // actual long types would have been decomposed by now. @@ -719,23 +718,24 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) { info->dstCount = 2; } + else +#else + { + assert(info->dstCount == 1); + } #endif + break; + + case GT_PUTARG_SPLIT: + TreeNodeInfoInitPutArgSplit(tree->AsPutArgSplit()); + break; + + case GT_PUTARG_STK: + TreeNodeInfoInitPutArgStk(tree->AsPutArgStk()); break; case GT_PUTARG_REG: -#ifdef ARM_SOFTFP - // This case currently only occurs for double types that are passed as TYP_LONG; - // actual long types would have been decomposed by now. - if (tree->TypeGet() == TYP_LONG) - { - info->srcCount = 2; - } - else -#endif - { - info->srcCount = 1; - } - info->dstCount = info->srcCount; + TreeNodeInfoInitPutArgReg(tree->AsUnOp()); break; default: @@ -755,15 +755,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_CLS_VAR_ADDR: case GT_IL_OFFSET: case GT_CNS_INT: - case GT_PUTARG_STK: case GT_LABEL: case GT_PINVOKE_PROLOG: case GT_JCC: case GT_SETCC: case GT_MEMORYBARRIER: case GT_OBJ: - case GT_PUTARG_SPLIT: - info->dstCount = tree->IsValue() ? 1 : 0; + assert(info->dstCount == (tree->IsValue() ? 1 : 0)); if (kind & (GTK_CONST | GTK_LEAF)) { info->srcCount = 0; diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index 37391675b0..0e0c2c60c3 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -53,9 +53,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) TreeNodeInfo* info = &(tree->gtLsraInfo); RegisterType registerType = TypeGet(tree); - JITDUMP("TreeNodeInfoInit for: "); - DISPNODE(tree); - JITDUMP("\n"); + if (tree->isContained()) + { + info->dstCount = 0; + assert(info->srcCount == 0); + return; + } + + // Set the default dstCount. This may be modified below. + info->dstCount = tree->IsValue() ? 1 : 0; switch (tree->OperGet()) { @@ -63,7 +69,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) GenTree* op2; default: - info->dstCount = tree->IsValue() ? 1 : 0; if (kind & (GTK_CONST | GTK_LEAF)) { info->srcCount = 0; @@ -88,7 +93,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: info->srcCount = 1; - info->dstCount = 0; + assert(info->dstCount == 0); TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon()); break; @@ -99,12 +104,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_START_NONGC: case GT_PROF_HOOK: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_CNS_DBL: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); { GenTreeDblCon* dblConst = tree->AsDblCon(); double constValue = dblConst->gtDblCon.gtDconVal; @@ -126,7 +131,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_QMARK: case GT_COLON: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); unreached(); break; @@ -138,14 +143,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) if (tree->TypeGet() == TYP_VOID) { info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); } else { assert(tree->TypeGet() == TYP_INT); info->srcCount = 1; - info->dstCount = 0; + assert(info->dstCount == 0); info->setSrcCandidates(l, RBM_INTRET); tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET); @@ -159,42 +164,40 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->srcCount = 0; if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr) { - info->dstCount = 1; + assert(info->dstCount == 1); } else { - info->dstCount = 0; + assert(info->dstCount == 0); } break; case GT_JTRUE: info->srcCount = 0; - info->dstCount = 0; - l->clearDstCount(tree->gtOp.gtOp1); + assert(info->dstCount == 0); break; case GT_JMP: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_SWITCH: // This should never occur since switch nodes must not be visible at this // point in the JIT. info->srcCount = 0; - info->dstCount = 0; // To avoid getting uninit errors. noway_assert(!"Switch must be lowered at this point"); break; case GT_JMPTABLE: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_SWITCH_TABLE: info->srcCount = 2; info->internalIntCount = 1; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_ASG: @@ -202,7 +205,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_ASG_SUB: noway_assert(!"We should never hit any assignment operator in lowering"); info->srcCount = 0; - info->dstCount = 0; break; case GT_ADD: @@ -217,9 +219,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet()); info->srcCount = 2; - info->dstCount = 1; - - break; } __fallthrough; @@ -227,16 +226,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_AND: case GT_OR: case GT_XOR: - ContainCheckBinary(tree->AsOp()); info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_RETURNTRAP: // this just turns into a compare of its child with an int // + a conditional call info->srcCount = 1; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_MOD: @@ -259,7 +257,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_UDIV: { info->srcCount = 2; - info->dstCount = 1; + assert(info->dstCount == 1); } break; @@ -277,7 +275,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) assert(op1->TypeGet() == tree->TypeGet()); info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); } break; @@ -294,7 +292,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // see CodeGen::genIntToIntCast() info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); // Non-overflow casts to/from float/double are done using SSE2 instructions // and that allow the source operand to be either a reg or memop. Given the @@ -347,12 +345,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_NEG: info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_NOT: info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_LSH: @@ -372,14 +370,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_CKFINITE: - info->srcCount = 1; - info->dstCount = 1; + info->srcCount = 1; + assert(info->dstCount == 1); info->internalIntCount = 1; break; case GT_CMPXCHG: info->srcCount = 3; - info->dstCount = 1; + assert(info->dstCount == 1); // TODO-ARM64-NYI NYI("CMPXCHG"); @@ -388,7 +386,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_LOCKADD: ContainCheckBinary(tree->AsOp()); info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; - info->dstCount = 1; + assert(info->dstCount == 1); + break; + + case GT_PUTARG_STK: + TreeNodeInfoInitPutArgStk(tree->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + TreeNodeInfoInitPutArgReg(tree->AsUnOp()); break; case GT_CALL: @@ -402,7 +408,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) assert(!l->isCandidateLocalRef(child)); MakeSrcContained(tree, child); info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); } break; @@ -411,7 +417,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // These should all be eliminated prior to Lowering. assert(!"Non-store block node in Lowering"); info->srcCount = 0; - info->dstCount = 0; break; case GT_STORE_BLK: @@ -423,14 +428,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_INIT_VAL: // Always a passthrough of its child's value. - info->srcCount = 0; - info->dstCount = 0; + assert(!"INIT_VAL should always be contained"); break; case GT_LCLHEAP: { ContainCheckLclHeap(tree->AsOp()); - info->dstCount = 1; + assert(info->dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): // Here '-' means don't care. @@ -536,7 +540,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) GenTreeBoundsChk* node = tree->AsBoundsChk(); // Consumes arrLen & index - has no result info->srcCount = 2; - info->dstCount = 0; + assert(info->dstCount == 0); GenTree* intCns = nullptr; GenTree* other = nullptr; @@ -555,12 +559,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // These must have been lowered to GT_ARR_INDEX noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_ARR_INDEX: - info->srcCount = 2; - info->dstCount = 1; + info->srcCount = 2; + assert(info->dstCount == 1); info->internalIntCount = 1; info->isInternalRegDelayFree = true; @@ -574,8 +578,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) ContainCheckArrOffset(tree->AsArrOffs()); // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. - info->srcCount = tree->gtArrOffs.gtOffset->isContained() ? 2 : 3; - info->dstCount = 1; + info->srcCount = tree->gtArrOffs.gtOffset->isContained() ? 2 : 3; + assert(info->dstCount == 1); info->internalIntCount = 1; break; @@ -587,8 +591,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) GenTree* index = lea->Index(); unsigned cns = lea->gtOffset; - // This LEA is instantiating an address, - // so we set up the srcCount and dstCount here. + // This LEA is instantiating an address, so we set up the srcCount here. info->srcCount = 0; if (base != nullptr) { @@ -598,7 +601,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) { info->srcCount++; } - info->dstCount = 1; + assert(info->dstCount == 1); // On ARM64 we may need a single internal register // (when both conditions are true then we still only need a single internal register) @@ -617,7 +620,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_STOREIND: { - info->dstCount = 0; + assert(info->dstCount == 0); if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { @@ -635,7 +638,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_NULLCHECK: - info->dstCount = 0; + assert(info->dstCount == 0); info->srcCount = 1; info->isLocalDefUse = true; // null check is an indirection on an addr @@ -643,14 +646,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_IND: - info->dstCount = 1; + assert(info->dstCount == 1); info->srcCount = 1; TreeNodeInfoInitIndir(tree->AsIndir()); break; case GT_CATCH_ARG: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); info->setDstCandidates(l, RBM_EXCEPTION_OBJECT); break; @@ -661,7 +664,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // It will produce a result of the type of the // node, and use an internal register for the address. - info->dstCount = 1; + assert(info->dstCount == 1); assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); info->internalIntCount = 1; break; @@ -692,7 +695,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree) regMaskTP useCandidates = RBM_NONE; info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1; - info->dstCount = 0; + assert(info->dstCount == 0); if (varTypeIsStruct(tree)) { diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index 2aca40f982..08fb4ba409 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -41,11 +41,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) { - ContainCheckStoreLoc(storeLoc); TreeNodeInfo* info = &(storeLoc->gtLsraInfo); GenTree* op1 = storeLoc->gtGetOp1(); - info->dstCount = 0; + assert(info->dstCount == 0); #ifdef _TARGET_ARM_ if (varTypeIsLong(op1)) { @@ -91,12 +90,17 @@ void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) // void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) { - ContainCheckCompare(tree->AsOp()); - TreeNodeInfo* info = &(tree->gtLsraInfo); info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; - info->dstCount = tree->OperIs(GT_CMP) ? 0 : 1; + if (info->isNoRegCompare) + { + info->dstCount = 0; + } + else + { + assert((info->dstCount == 1) || tree->OperIs(GT_CMP)); + } } void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) @@ -159,8 +163,6 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) // void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) { - ContainCheckIndir(indirTree); - // If this is the rhs of a block copy (i.e. non-enregisterable struct), // it has no register requirements. if (indirTree->TypeGet() == TYP_STRUCT) @@ -284,16 +286,14 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) // Return Value: // None. // -void Lowering::TreeNodeInfoInitPutArgReg( - GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) +void Lowering::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node) { assert(node != nullptr); assert(node->OperIsPutArgReg()); + node->gtLsraInfo.srcCount = 1; + regNumber argReg = node->gtRegNum; assert(argReg != REG_NA); - // Each register argument corresponds to one source. - info.srcCount++; - // Set the register requirements for the node. regMaskTP argMask = genRegMask(argReg); #ifdef ARM_SOFTFP @@ -301,7 +301,7 @@ void Lowering::TreeNodeInfoInitPutArgReg( // The actual `long` types must have been transformed as a field list with two fields. if (node->TypeGet() == TYP_LONG) { - info.srcCount++; + node->gtLsraInfo.srcCount++; assert(genRegArgNext(argReg) == REG_NEXT(argReg)); argMask |= genRegMask(REG_NEXT(argReg)); } @@ -312,8 +312,37 @@ void Lowering::TreeNodeInfoInitPutArgReg( // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); +} + +//------------------------------------------------------------------------ +// HandleFloatVarArgs: Handle additional register requirements for a varargs call +// +// Arguments: +// call - The call node of interest +// argNode - The current argument +// +// Return Value: +// None. +// +// Notes: +// In the case of a varargs call, the ABI dictates that if we have floating point args, +// we must pass the enregistered arguments in both the integer and floating point registers. +// Since the integer register is not associated with the arg node, we will reserve it as +// an internal register on the call so that it is not used during the evaluation of the call node +// (e.g. for the target). +void Lowering::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs) +{ +#if FEATURE_VARARG + if (call->IsVarargs() && varTypeIsFloating(argNode)) + { + *callHasFloatRegArgs = true; - *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); + regNumber argReg = argNode->gtRegNum; + regNumber targetReg = comp->getCallArgIntRegister(argReg); + call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1); + call->gtLsraInfo.addInternalCandidates(m_lsra, genRegMask(targetReg)); + } +#endif // FEATURE_VARARG } //------------------------------------------------------------------------ @@ -422,25 +451,8 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) info->setDstCandidates(l, RBM_INTRET); } - // If there is an explicit this pointer, we don't want that node to produce anything - // as it is redundant - if (call->gtCallObjp != nullptr) - { - GenTreePtr thisPtrNode = call->gtCallObjp; - - if (thisPtrNode->gtOper == GT_PUTARG_REG) - { - l->clearOperandCounts(thisPtrNode); - thisPtrNode->SetContained(); - l->clearDstCount(thisPtrNode->gtOp.gtOp1); - } - else - { - l->clearDstCount(thisPtrNode); - } - } - // First, count reg args + // Each register argument corresponds to one source. bool callHasFloatRegArgs = false; for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) @@ -449,29 +461,62 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) GenTreePtr argNode = list->Current(); +#ifdef DEBUG + // During TreeNodeInfoInit, we only use the ArgTabEntry for validation, + // as getting it is rather expensive. fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + regNumber argReg = curArgTabEntry->regNum; assert(curArgTabEntry); +#endif - if (curArgTabEntry->regNum == REG_STK) + if (argNode->gtOper == GT_PUTARG_STK) { // late arg that is not passed in a register - assert(argNode->gtOper == GT_PUTARG_STK); + assert(curArgTabEntry->regNum == REG_STK); + GenTree* putArgChild = argNode->gtGetOp1(); + if (!varTypeIsStruct(putArgChild) && !putArgChild->OperIs(GT_FIELD_LIST)) + { +#ifdef ARM_SOFTFP + // The `double` types have been transformed to `long` on armel, while the actual longs + // have been decomposed. + const bool isDouble = putArgChild->TypeGet() == TYP_LONG; + if (isDouble) + { + argNode->gtLsraInfo.srcCount = 2; + } +#endif // ARM_SOFT_FP + +#ifdef DEBUG +// Validate the slot count for this arg. +#ifdef _TARGET_ARM_ +#ifndef ARM_SOFTFP + const bool isDouble = (curArgTabEntry->numSlots == 2) && (putArgChild->TypeGet() == TYP_DOUBLE); +#endif // !ARM_SOFTFP + + // We must not have a multi-reg struct; double uses 2 slots and isn't a multi-reg struct + assert((curArgTabEntry->numSlots == 1) || isDouble); - TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); +#else // !_TARGET_ARM_ + // We must not have a multi-reg struct + assert(curArgTabEntry->numSlots == 1); +#endif // !_TARGET_ARM_ +#endif + } continue; } // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct if (argNode->OperGet() == GT_FIELD_LIST) { - argNode->SetContained(); + assert(argNode->isContained()); // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) - regNumber argReg = curArgTabEntry->regNum; for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) { - TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs); - + info->srcCount++; +#ifdef DEBUG + assert(entry->Current()->OperIs(GT_PUTARG_REG)); + assert(entry->Current()->gtRegNum == argReg); // Update argReg for the next putarg_reg (if any) argReg = genRegArgNext(argReg); @@ -482,18 +527,21 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) argReg = genRegArgNext(argReg); } #endif // _TARGET_ARM_ +#endif } } #ifdef _TARGET_ARM_ else if (argNode->OperGet() == GT_PUTARG_SPLIT) { fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); - TreeNodeInfoInitPutArgSplit(argNode->AsPutArgSplit(), *info, curArgTabEntry); } #endif else { - TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs); + assert(argNode->OperIs(GT_PUTARG_REG)); + assert(argNode->gtRegNum == argReg); + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); + info->srcCount++; } } @@ -518,25 +566,21 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) assert(curArgTabEntry); assert(curArgTabEntry->regNum == REG_STK); - - TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); } #ifdef _TARGET_ARM_ else if (arg->OperGet() == GT_PUTARG_SPLIT) { +#ifdef DEBUG fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); - TreeNodeInfoInitPutArgSplit(arg->AsPutArgSplit(), *info, curArgTabEntry); + assert(arg->AsPutArgSplit()->gtNumRegs == curArgTabEntry->numRegs); +#endif + info->srcCount += arg->gtLsraInfo.dstCount; } #endif else { TreeNodeInfo* argInfo = &(arg->gtLsraInfo); - if (argInfo->dstCount != 0) - { - argInfo->isLocalDefUse = true; - } - - argInfo->dstCount = 0; + assert((argInfo->dstCount == 0) || (argInfo->isLocalDefUse)); } } args = args->gtOp.gtOp2; @@ -576,16 +620,13 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // Notes: // Set the child node(s) to be contained when we have a multireg arg // -void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) +void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode) { assert(argNode->gtOper == GT_PUTARG_STK); GenTreePtr putArgChild = argNode->gtOp.gtOp1; - // Initialize 'argNode' as not contained, as this is both the default case - // and how MakeSrcContained expects to find things setup. - // - argNode->gtLsraInfo.srcCount = 1; + argNode->gtLsraInfo.srcCount = 0; argNode->gtLsraInfo.dstCount = 0; // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct @@ -595,9 +636,12 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr if (putArgChild->OperGet() == GT_FIELD_LIST) { + assert(putArgChild->isContained()); // We consume all of the items in the GT_FIELD_LIST - argNode->gtLsraInfo.srcCount = info->numSlots; - putArgChild->SetContained(); + for (GenTreeFieldList* current = putArgChild->AsFieldList(); current != nullptr; current = current->Rest()) + { + argNode->gtLsraInfo.srcCount++; + } } else { @@ -617,8 +661,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR // as one contained operation // - MakeSrcContained(putArgChild, objChild); - putArgChild->gtLsraInfo.srcCount--; + assert(objChild->isContained()); } } @@ -626,31 +669,24 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr // as one contained operation // argNode->gtLsraInfo.srcCount = putArgChild->gtLsraInfo.srcCount; - MakeSrcContained(argNode, putArgChild); + assert(putArgChild->isContained()); } } else { -#ifdef _TARGET_ARM_ - -#ifdef ARM_SOFTFP - // The `double` types have been transformed to `long` on armel. - const bool isDouble = (info->numSlots == 2) && (putArgChild->TypeGet() == TYP_LONG); +#if defined(_TARGET_ARM_) && defined(ARM_SOFTFP) + // The `double` types have been transformed to `long` on armel, + // while the actual long types have been decomposed. + const bool isDouble = (putArgChild->TypeGet() == TYP_LONG); if (isDouble) { argNode->gtLsraInfo.srcCount = 2; } -#else // !ARM_SOFTFP - const bool isDouble = (info->numSlots == 2) && (putArgChild->TypeGet() == TYP_DOUBLE); -#endif // !ARM_SOFTFP - - // We must not have a multi-reg struct; double uses 2 slots and isn't a multi-reg struct - assert((info->numSlots == 1) || isDouble); - -#else // !_TARGET_ARM_ - // We must not have a multi-reg struct - assert(info->numSlots == 1); -#endif // !_TARGET_ARM_ + else +#endif // defined(_TARGET_ARM_) && defined(ARM_SOFTFP) + { + argNode->gtLsraInfo.srcCount = 1; + } } } @@ -667,19 +703,18 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr // Notes: // Set the child node(s) to be contained // -void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo& info, fgArgTabEntryPtr argInfo) +void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode) { assert(argNode->gtOper == GT_PUTARG_SPLIT); GenTreePtr putArgChild = argNode->gtOp.gtOp1; // Registers for split argument corresponds to source - argNode->gtLsraInfo.dstCount = argInfo->numRegs; - info.srcCount += argInfo->numRegs; + argNode->gtLsraInfo.dstCount = argNode->gtNumRegs; - regNumber argReg = argInfo->regNum; + regNumber argReg = argNode->gtRegNum; regMaskTP argMask = RBM_NONE; - for (unsigned i = 0; i < argInfo->numRegs; i++) + for (unsigned i = 0; i < argNode->gtNumRegs; i++) { argMask |= genRegMask((regNumber)((unsigned)argReg + i)); } @@ -692,29 +727,32 @@ void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNode // 1. Consume all of the items in the GT_FIELD_LIST (source) // 2. Store to target slot and move to target registers (destination) from source // - argNode->gtLsraInfo.srcCount = argInfo->numRegs + argInfo->numSlots; + unsigned slotCount = 0; // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. GenTreeFieldList* fieldListPtr = putArgChild->AsFieldList(); for (unsigned idx = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), idx++) { - if (idx < argInfo->numRegs) + if (idx < argNode->gtNumRegs) { GenTreePtr node = fieldListPtr->gtGetOp1(); node->gtLsraInfo.setSrcCandidates(m_lsra, genRegMask((regNumber)((unsigned)argReg + idx))); } + else + { + slotCount++; + } } - - putArgChild->SetContained(); + argNode->gtLsraInfo.srcCount = argNode->gtNumRegs + slotCount; + assert(putArgChild->isContained()); } else { assert(putArgChild->TypeGet() == TYP_STRUCT); assert(putArgChild->OperGet() == GT_OBJ); - // We could use a ldr/str sequence so we need a internal register - argNode->gtLsraInfo.srcCount = 1; + // We can use a ldr/str sequence so we need an internal register argNode->gtLsraInfo.internalIntCount = 1; regMaskTP internalMask = RBM_ALLINT & ~argMask; argNode->gtLsraInfo.setInternalCandidates(m_lsra, internalMask); @@ -725,11 +763,13 @@ void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNode // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR // as one contained operation // - MakeSrcContained(putArgChild, objChild); - putArgChild->gtLsraInfo.srcCount--; + assert(objChild->isContained()); + } + else + { + argNode->gtLsraInfo.srcCount = GetIndirSourceCount(putArgChild->AsIndir()); } - argNode->gtLsraInfo.srcCount = putArgChild->gtLsraInfo.srcCount; - MakeSrcContained(argNode, putArgChild); + assert(putArgChild->isContained()); } } #endif // _TARGET_ARM_ @@ -753,43 +793,17 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // Sources are dest address and initVal or source. // We may require an additional source or temp register for the size. - blkNode->gtLsraInfo.srcCount = 2; - blkNode->gtLsraInfo.dstCount = 0; - GenTreePtr srcAddrOrFill = nullptr; - bool isInitBlk = blkNode->OperIsInitBlkOp(); - - if (!isInitBlk) - { - // CopyObj or CopyBlk - if (source->gtOper == GT_IND) - { - srcAddrOrFill = blkNode->Data()->gtGetOp1(); - // We're effectively setting source as contained, but can't call MakeSrcContained, because the - // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. - // If srcAddr is already non-contained, we don't need to change it. - if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) - { - srcAddrOrFill->gtLsraInfo.setDstCount(1); - srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); - } - m_lsra->clearOperandCounts(source); - source->SetContained(); - source->AsIndir()->Addr()->ClearContained(); - } - else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) - { - assert(source->IsLocal()); - MakeSrcContained(blkNode, source); - blkNode->gtLsraInfo.srcCount--; - } - } + blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr); + assert(blkNode->gtLsraInfo.dstCount == 0); + GenTreePtr srcAddrOrFill = nullptr; + bool isInitBlk = blkNode->OperIsInitBlkOp(); if (isInitBlk) { GenTreePtr initVal = source; if (initVal->OperIsInitVal()) { - initVal->SetContained(); + assert(initVal->isContained()); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; @@ -801,22 +815,18 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // code sequences to improve CQ. // For reference see the code in lsraxarch.cpp. NYI_ARM("initblk loop unrolling is currently not implemented."); - -#ifdef _TARGET_ARM64_ - // No additional temporaries required - ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; - if (fill == 0) + if (!initVal->isContained()) { - MakeSrcContained(blkNode, source); - blkNode->gtLsraInfo.srcCount--; + blkNode->gtLsraInfo.srcCount++; } -#endif // _TARGET_ARM64_ } else { assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); // The helper follows the regular ABI. dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + assert(!initVal->isContained()); + blkNode->gtLsraInfo.srcCount++; initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); if (size != 0) { @@ -838,6 +848,10 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) { // CopyObj or CopyBlk // Sources are src and dest and size if not constant. + if (source->gtOper == GT_IND) + { + srcAddrOrFill = blkNode->Data()->gtGetOp1(); + } if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj @@ -913,7 +927,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) else { // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); + assert(blkNode->gtOper == GT_STORE_DYN_BLK); blkNode->gtLsraInfo.setSrcCount(3); GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); @@ -925,6 +939,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); } } + blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source); } } @@ -951,6 +966,12 @@ int Lowering::GetOperandSourceCount(GenTree* node) } #endif // !defined(_TARGET_64BIT_) + if (node->OperIsIndir()) + { + const unsigned srcCount = GetIndirSourceCount(node->AsIndir()); + return srcCount; + } + return 0; } diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index e90be05573..28d345ddfb 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -41,11 +41,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) { - ContainCheckStoreLoc(storeLoc); - TreeNodeInfo* info = &(storeLoc->gtLsraInfo); - info->dstCount = 0; - GenTree* op1 = storeLoc->gtGetOp1(); + assert(info->dstCount == 0); + GenTree* op1 = storeLoc->gtGetOp1(); #ifdef _TARGET_X86_ if (op1->OperGet() == GT_LONG) @@ -117,6 +115,17 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) TreeNodeInfo* info = &(tree->gtLsraInfo); + if (tree->isContained()) + { + info->dstCount = 0; + assert(info->srcCount == 0); + TreeNodeInfoInitCheckByteable(tree); + return; + } + + // Set the default dstCount. This may be modified below. + info->dstCount = tree->IsValue() ? 1 : 0; + // floating type generates AVX instruction (vmovss etc.), set the flag SetContainsAVXFlags(varTypeIsFloating(tree->TypeGet())); switch (tree->OperGet()) @@ -128,10 +137,28 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) TreeNodeInfoInitSimple(tree); break; - case GT_LCL_FLD: case GT_LCL_VAR: + // Because we do containment analysis before we redo dataflow and identify register + // candidates, the containment analysis only !lvDoNotEnregister to estimate register + // candidates. + // If there is a lclVar that is estimated to be register candidate but + // is not, if they were marked regOptional they should now be marked contained instead. + // TODO-XArch-CQ: When this is being called while RefPositions are being created, + // use lvLRACandidate here instead. + if (info->regOptional) + { + if (!compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvTracked || + compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvDoNotEnregister) + { + info->regOptional = false; + tree->SetContained(); + info->dstCount = 0; + } + } + __fallthrough; + + case GT_LCL_FLD: info->srcCount = 0; - info->dstCount = 1; #ifdef FEATURE_SIMD // Need an additional register to read upper 4 bytes of Vector3. @@ -158,12 +185,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_START_NONGC: case GT_PROF_HOOK: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_CNS_DBL: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); break; #if !defined(_TARGET_64BIT_) @@ -173,14 +200,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) { // An unused GT_LONG node needs to consume its sources. info->srcCount = 2; + info->dstCount = 0; } else { - // Passthrough + // Passthrough. Should have been marked contained. info->srcCount = 0; + assert(info->dstCount == 0); } - - info->dstCount = 0; break; #endif // !defined(_TARGET_64BIT_) @@ -190,7 +217,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_QMARK: case GT_COLON: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); unreached(); break; @@ -199,17 +226,16 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_RETFILT: + assert(info->dstCount == 0); if (tree->TypeGet() == TYP_VOID) { info->srcCount = 0; - info->dstCount = 0; } else { assert(tree->TypeGet() == TYP_INT); info->srcCount = 1; - info->dstCount = 0; info->setSrcCandidates(l, RBM_INTRET); tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET); @@ -223,24 +249,23 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->srcCount = 0; if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr) { - info->dstCount = 1; + assert(info->dstCount == 1); } else { - info->dstCount = 0; + assert(info->dstCount == 0); } break; case GT_JTRUE: { info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); GenTree* cmp = tree->gtGetOp1(); - l->clearDstCount(cmp); + assert(cmp->gtLsraInfo.dstCount == 0); #ifdef FEATURE_SIMD - ContainCheckJTrue(tree->AsOp()); GenTree* cmpOp1 = cmp->gtGetOp1(); GenTree* cmpOp2 = cmp->gtGetOp2(); if (cmpOp1->IsSIMDEqualityOrInequality() && cmpOp2->isContained()) @@ -249,46 +274,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // We always generate code for a SIMD equality comparison, but the compare itself produces no value. // Neither the SIMD node nor the immediate need to be evaluated into a register. - l->clearOperandCounts(cmp); - l->clearDstCount(cmpOp1); - l->clearOperandCounts(cmpOp2); - - // Codegen of SIMD (in)Equality uses target integer reg only for setting flags. - // A target reg is not needed on AVX when comparing against Vector Zero. - // In all other cases we need to reserve an int type internal register, since we - // have cleared dstCount. - if (!compiler->canUseAVX() || !cmpOp1->gtGetOp2()->IsIntegralConstVector(0)) - { - ++(cmpOp1->gtLsraInfo.internalIntCount); - regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l); - internalCandidates |= l->allRegs(TYP_INT); - cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates); - } - - // We have to reverse compare oper in the following cases: - // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it. - // Therefore, if compare oper is == or != against false(0), we will - // be checking opposite of what is required. - // - // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it. - // Therefore, if compare oper is == or != against true(1), we will - // be checking opposite of what is required. - GenTreeSIMD* simdNode = cmpOp1->AsSIMD(); - if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) - { - if (cmpOp2->IsIntegralConst(0)) - { - cmp->SetOper(GenTree::ReverseRelop(cmpOper)); - } - } - else - { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality); - if (cmpOp2->IsIntegralConst(1)) - { - cmp->SetOper(GenTree::ReverseRelop(cmpOper)); - } - } + assert(cmpOp1->gtLsraInfo.dstCount == 0); + assert(cmpOp2->gtLsraInfo.dstCount == 0); } #endif // FEATURE_SIMD } @@ -296,12 +283,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_JCC: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_SETCC: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); #ifdef _TARGET_X86_ info->setDstCandidates(m_lsra, RBM_BYTE_REGS); #endif // _TARGET_X86_ @@ -309,26 +296,25 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_JMP: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_SWITCH: // This should never occur since switch nodes must not be visible at this // point in the JIT. info->srcCount = 0; - info->dstCount = 0; // To avoid getting uninit errors. noway_assert(!"Switch must be lowered at this point"); break; case GT_JMPTABLE: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); break; case GT_SWITCH_TABLE: info->srcCount = 2; info->internalIntCount = 1; - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_ASG: @@ -336,7 +322,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_ASG_SUB: noway_assert(!"We should never hit any assignment operator in lowering"); info->srcCount = 0; - info->dstCount = 0; break; #if !defined(_TARGET_64BIT_) @@ -351,10 +336,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // Rather they only support "op xmm, mem/xmm" form. if (varTypeIsFloating(tree->TypeGet())) { - ContainCheckFloatBinary(tree->AsOp()); - info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1); + info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2); - info->dstCount = 1; break; } @@ -363,19 +346,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_AND: case GT_OR: case GT_XOR: - ContainCheckBinary(tree->AsOp()); - info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1); + info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2); - info->dstCount = 1; - // Codegen of this tree node sets ZF and SF flags. - tree->gtFlags |= GTF_ZSF_SET; break; case GT_RETURNTRAP: // This just turns into a compare of its child with an int + a conditional call - ContainCheckReturnTrap(tree->AsOp()); - info->srcCount = tree->gtOp.gtOp1->isContained() ? 0 : 1; - info->dstCount = 0; + info->srcCount = tree->gtOp.gtOp1->isContained() ? 0 : 1; + assert(info->dstCount == 0); info->internalIntCount = 1; info->setInternalCandidates(l, l->allRegs(TYP_INT)); break; @@ -406,7 +384,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) #endif // FEATURE_SIMD case GT_CAST: - ContainCheckCast(tree->AsCast()); TreeNodeInfoInitCast(tree); break; @@ -417,8 +394,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_NEG: - info->srcCount = 1; - info->dstCount = 1; + info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); // TODO-XArch-CQ: // SSE instruction set doesn't have an instruction to negate a number. @@ -441,16 +417,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->internalFloatCount = 1; info->setInternalCandidates(l, l->internalFloatRegCandidates()); } - else - { - // Codegen of this tree node sets ZF and SF flags. - tree->gtFlags |= GTF_ZSF_SET; - } break; case GT_NOT: - info->srcCount = 1; - info->dstCount = 1; + info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); break; case GT_LSH: @@ -478,14 +448,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_CKFINITE: - info->srcCount = 1; - info->dstCount = 1; + info->srcCount = 1; + assert(info->dstCount == 1); info->internalIntCount = 1; break; case GT_CMPXCHG: info->srcCount = 3; - info->dstCount = 1; + assert(info->dstCount == 1); // comparand is preferenced to RAX. // Remaining two operands can be in any reg other than RAX. @@ -496,16 +466,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_LOCKADD: - info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + op2 = tree->gtOp.gtOp2; + info->srcCount = op2->isContained() ? 1 : 2; + assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); + break; - if (CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2)) - { - info->srcCount = 1; - } - else - { - info->srcCount = 2; - } + case GT_PUTARG_REG: + TreeNodeInfoInitPutArgReg(tree->AsUnOp()); break; case GT_CALL: @@ -517,9 +484,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // For a GT_ADDR, the child node should not be evaluated into a register GenTreePtr child = tree->gtOp.gtOp1; assert(!l->isCandidateLocalRef(child)); - MakeSrcContained(tree, child); + assert(child->isContained()); + assert(info->dstCount == 1); info->srcCount = 0; - info->dstCount = 1; } break; @@ -531,12 +498,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // These should all be eliminated prior to Lowering. assert(!"Non-store block node in Lowering"); info->srcCount = 0; - info->dstCount = 0; break; #ifdef FEATURE_PUT_STRUCT_ARG_STK case GT_PUTARG_STK: - LowerPutArgStk(tree->AsPutArgStk()); TreeNodeInfoInitPutArgStk(tree->AsPutArgStk()); break; #endif // FEATURE_PUT_STRUCT_ARG_STK @@ -544,14 +509,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: - LowerBlockStore(tree->AsBlk()); TreeNodeInfoInitBlockStore(tree->AsBlk()); break; case GT_INIT_VAL: // Always a passthrough of its child's value. - info->srcCount = 0; - info->dstCount = 0; + assert(!"INIT_VAL should always be contained"); break; case GT_LCLHEAP: @@ -562,23 +525,21 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) #ifdef FEATURE_SIMD case GT_SIMD_CHK: #endif // FEATURE_SIMD - ContainCheckBoundsChk(tree->AsBoundsChk()); // Consumes arrLen & index - has no result info->srcCount = GetOperandSourceCount(tree->AsBoundsChk()->gtIndex); info->srcCount += GetOperandSourceCount(tree->AsBoundsChk()->gtArrLen); - info->dstCount = 0; + assert(info->dstCount == 0); break; case GT_ARR_ELEM: // These must have been lowered to GT_ARR_INDEX - noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); + noway_assert(!"We should never see a GT_ARR_ELEM after Lowering."); info->srcCount = 0; - info->dstCount = 0; break; case GT_ARR_INDEX: info->srcCount = 2; - info->dstCount = 1; + assert(info->dstCount == 1); // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true; @@ -588,27 +549,26 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_ARR_OFFSET: // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. - info->srcCount = 2; - info->dstCount = 1; - - if (tree->gtArrOffs.gtOffset->IsIntegralConst(0)) + assert(info->dstCount == 1); + if (tree->gtArrOffs.gtOffset->isContained()) { - MakeSrcContained(tree, tree->gtArrOffs.gtOffset); + info->srcCount = 2; } else { info->srcCount++; // Here we simply need an internal register, which must be different // from any of the operand's registers, but may be the same as targetReg. + info->srcCount = 3; info->internalIntCount = 1; } break; case GT_LEA: - // The LEA usually passes its operands through to the GT_IND, in which case we'll - // clear the info->srcCount and info->dstCount later, but we may be instantiating an address, - // so we set them here. + // The LEA usually passes its operands through to the GT_IND, in which case it will + // be contained, but we may be instantiating an address, in which case we set them here. info->srcCount = 0; + assert(info->dstCount == 1); if (tree->AsAddrMode()->HasBase()) { info->srcCount++; @@ -617,70 +577,38 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) { info->srcCount++; } - info->dstCount = 1; break; case GT_STOREIND: - { - info->srcCount = 2; - info->dstCount = 0; - GenTree* src = tree->gtOp.gtOp2; - if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { TreeNodeInfoInitGCWriteBarrier(tree); break; } - - // If the source is a containable immediate, make it contained, unless it is - // an int-size or larger store of zero to memory, because we can generate smaller code - // by zeroing a register and then storing it. - if (IsContainableImmed(tree, src) && - (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR)) - { - MakeSrcContained(tree, src); - } - else if (!varTypeIsFloating(tree)) - { - // Perform recognition of trees with the following structure: - // StoreInd(addr, BinOp(expr, GT_IND(addr))) - // to be able to fold this into an instruction of the form - // BINOP [addr], register - // where register is the actual place where 'expr' is computed. - // - // SSE2 doesn't support RMW form of instructions. - if (TreeNodeInfoInitIfRMWMemOp(tree)) - { - break; - } - } - TreeNodeInfoInitIndir(tree->AsIndir()); - } - break; + break; case GT_NULLCHECK: - info->dstCount = 0; + assert(info->dstCount == 0); info->srcCount = 1; info->isLocalDefUse = true; break; case GT_IND: - info->dstCount = 1; - info->srcCount = 1; TreeNodeInfoInitIndir(tree->AsIndir()); + assert(info->dstCount == 1); break; case GT_CATCH_ARG: info->srcCount = 0; - info->dstCount = 1; + assert(info->dstCount == 1); info->setDstCandidates(l, RBM_EXCEPTION_OBJECT); break; #if !FEATURE_EH_FUNCLETS case GT_END_LFIN: info->srcCount = 0; - info->dstCount = 0; + assert(info->dstCount == 0); break; #endif @@ -715,8 +643,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) op1->gtLsraInfo.isTgtPref = true; // Is this a non-commutative operator, or is op2 a contained memory op? - // (Note that we can't call IsContained() at this point because it uses exactly the - // same information we're currently computing.) // In either case, we need to make op2 remain live until the op is complete, by marking // the source(s) associated with op2 as "delayFree". // Note that if op2 of a binary RMW operator is a memory op, even if the operator @@ -760,8 +686,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) delayUseSrc = op1; } - else if ((op2 != nullptr) && (!tree->OperIsCommutative() || - (IsContainableMemoryOp(op2, true) && (op2->gtLsraInfo.srcCount == 0)))) + else if ((op2 != nullptr) && + (!tree->OperIsCommutative() || (IsContainableMemoryOp(op2) && (op2->gtLsraInfo.srcCount == 0)))) { delayUseSrc = op2; } @@ -836,24 +762,24 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree) info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS); } - if (tree->OperIsSimple() && (info->srcCount > 0)) + if (tree->OperIsSimple()) { - // No need to set src candidates on a contained child operand. GenTree* op = tree->gtOp.gtOp1; - assert(op != nullptr); - bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0); - if (!containedNode) + if (op != nullptr) { - regMask = op->gtLsraInfo.getSrcCandidates(l); - assert(regMask != RBM_NONE); - op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS); + // No need to set src candidates on a contained child operand. + if (!op->isContained()) + { + regMask = op->gtLsraInfo.getSrcCandidates(l); + assert(regMask != RBM_NONE); + op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS); + } } if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr)) { - op = tree->gtOp.gtOp2; - containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0); - if (!containedNode) + op = tree->gtOp.gtOp2; + if (!op->isContained()) { regMask = op->gtLsraInfo.getSrcCandidates(l); assert(regMask != RBM_NONE); @@ -866,7 +792,7 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree) } //------------------------------------------------------------------------ -// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees +// TreeNodeInfoInitSimple: Sets the srcCount for all the trees // without special handling based on the tree node type. // // Arguments: @@ -878,8 +804,12 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree) void Lowering::TreeNodeInfoInitSimple(GenTree* tree) { TreeNodeInfo* info = &(tree->gtLsraInfo); - unsigned kind = tree->OperKind(); - info->dstCount = tree->IsValue() ? 1 : 0; + if (tree->isContained()) + { + info->srcCount = 0; + return; + } + unsigned kind = tree->OperKind(); if (kind & (GTK_CONST | GTK_LEAF)) { info->srcCount = 0; @@ -888,12 +818,9 @@ void Lowering::TreeNodeInfoInitSimple(GenTree* tree) { if (tree->gtGetOp2IfPresent() != nullptr) { - info->srcCount = 2; - } - else - { - info->srcCount = 1; + info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2); } + info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1); } else { @@ -912,8 +839,6 @@ void Lowering::TreeNodeInfoInitSimple(GenTree* tree) // void Lowering::TreeNodeInfoInitReturn(GenTree* tree) { - ContainCheckRet(tree->AsOp()); - TreeNodeInfo* info = &(tree->gtLsraInfo); LinearScan* l = m_lsra; Compiler* compiler = comp; @@ -928,7 +853,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree) info->srcCount = 2; loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO); hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI); - info->dstCount = 0; + assert(info->dstCount == 0); } else #endif // !defined(_TARGET_64BIT_) @@ -936,7 +861,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree) regMaskTP useCandidates = RBM_NONE; info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1; - info->dstCount = 0; + assert(info->dstCount == 0); #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (varTypeIsStruct(tree)) @@ -996,10 +921,6 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) { TreeNodeInfo* info = &(tree->gtLsraInfo); LinearScan* l = m_lsra; - ContainCheckShiftRotate(tree->AsOp()); - - info->srcCount = 2; - info->dstCount = 1; // For shift operations, we need that the number // of bits moved gets stored in CL in case @@ -1076,16 +997,14 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) // Return Value: // None. // -void Lowering::TreeNodeInfoInitPutArgReg( - GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) +void Lowering::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node) { assert(node != nullptr); assert(node->OperIsPutArgReg()); + node->gtLsraInfo.srcCount = 1; + regNumber argReg = node->gtRegNum; assert(argReg != REG_NA); - // Each register argument corresponds to one source. - info.srcCount++; - // Set the register requirements for the node. const regMaskTP argMask = genRegMask(argReg); node->gtLsraInfo.setDstCandidates(m_lsra, argMask); @@ -1094,20 +1013,35 @@ void Lowering::TreeNodeInfoInitPutArgReg( // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); +} +//------------------------------------------------------------------------ +// HandleFloatVarArgs: Handle additional register requirements for a varargs call +// +// Arguments: +// call - The call node of interest +// argNode - The current argument +// +// Return Value: +// None. +// +// Notes: +// In the case of a varargs call, the ABI dictates that if we have floating point args, +// we must pass the enregistered arguments in both the integer and floating point registers. +// Since the integer register is not associated with the arg node, we will reserve it as +// an internal register on the call so that it is not used during the evaluation of the call node +// (e.g. for the target). +void Lowering::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs) +{ #if FEATURE_VARARG - *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); - - // In the case of a varargs call, the ABI dictates that if we have floating point args, - // we must pass the enregistered arguments in both the integer and floating point registers. - // Since the integer register is not associated with this arg node, we will reserve it as - // an internal register so that it is not used during the evaluation of the call node - // (e.g. for the target). - if (isVarArgs && varTypeIsFloating(node)) + if (call->IsVarargs() && varTypeIsFloating(argNode)) { + *callHasFloatRegArgs = true; + + regNumber argReg = argNode->gtRegNum; regNumber targetReg = comp->getCallArgIntRegister(argReg); - info.setInternalIntCount(info.internalIntCount + 1); - info.addInternalCandidates(m_lsra, genRegMask(targetReg)); + call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1); + call->gtLsraInfo.addInternalCandidates(m_lsra, genRegMask(targetReg)); } #endif // FEATURE_VARARG } @@ -1129,6 +1063,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) bool hasMultiRegRetVal = false; ReturnTypeDesc* retTypeDesc = nullptr; + assert(!call->isContained()); info->srcCount = 0; if (call->TypeGet() != TYP_VOID) { @@ -1141,43 +1076,36 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) } else { - info->dstCount = 1; + assert(info->dstCount == 1); } } else { - info->dstCount = 0; + assert(info->dstCount == 0); } GenTree* ctrlExpr = call->gtControlExpr; if (call->gtCallType == CT_INDIRECT) { - // either gtControlExpr != null or gtCallAddr != null. - // Both cannot be non-null at the same time. - assert(ctrlExpr == nullptr); - assert(call->gtCallAddr != nullptr); ctrlExpr = call->gtCallAddr; - -#ifdef _TARGET_X86_ - // Fast tail calls aren't currently supported on x86, but if they ever are, the code - // below that handles indirect VSD calls will need to be fixed. - assert(!call->IsFastTailCall() || !call->IsVirtualStub()); -#endif // _TARGET_X86_ } // set reg requirements on call target represented as control sequence. if (ctrlExpr != nullptr) { - // we should never see a gtControlExpr whose type is void. - assert(ctrlExpr->TypeGet() != TYP_VOID); - - // call can take a Rm op on x64 - // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into a register. - if (!call->IsFastTailCall()) + if (call->IsFastTailCall()) { + { + // Fast tail call - make sure that call target is always computed in RAX + // so that epilog sequence can generate "jmp rax" to achieve fast tail call. + ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX); + } + } #ifdef _TARGET_X86_ + else + { // On x86, we need to generate a very specific pattern for indirect VSD calls: // // 3-byte nop @@ -1187,24 +1115,11 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // sure that the call target address is computed into EAX in this case. if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) { - assert(ctrlExpr->isIndir()); - + assert(ctrlExpr->isIndir() && ctrlExpr->isContained()); ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET); - MakeSrcContained(call, ctrlExpr); } - else -#endif // _TARGET_X86_ - if (ctrlExpr->isIndir()) - { - MakeSrcContained(call, ctrlExpr); - } - } - else - { - // Fast tail call - make sure that call target is always computed in RAX - // so that epilog sequence can generate "jmp rax" to achieve fast tail call. - ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX); } +#endif // _TARGET_X86_ info->srcCount += GetOperandSourceCount(ctrlExpr); } @@ -1259,24 +1174,6 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // callRegArgs + (callargs - placeholders, setup, etc) // there is an explicit thisPtr but it is redundant - // If there is an explicit this pointer, we don't want that node to produce anything - // as it is redundant - if (call->gtCallObjp != nullptr) - { - GenTreePtr thisPtrNode = call->gtCallObjp; - - if (thisPtrNode->gtOper == GT_PUTARG_REG) - { - l->clearOperandCounts(thisPtrNode); - thisPtrNode->SetContained(); - l->clearDstCount(thisPtrNode->gtOp.gtOp1); - } - else - { - l->clearDstCount(thisPtrNode); - } - } - bool callHasFloatRegArgs = false; bool isVarArgs = call->IsVarargs(); @@ -1296,16 +1193,34 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // Note that this property is statically checked by Lowering::CheckBlock. GenTreePtr argNode = list->Current(); + // Each register argument corresponds to one source. + if (argNode->OperIsPutArgReg()) + { + info->srcCount++; + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); + } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else if (argNode->OperGet() == GT_FIELD_LIST) + { + for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) + { + assert(entry->Current()->OperIsPutArgReg()); + info->srcCount++; + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); + } + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + +#ifdef DEBUG + // In DEBUG only, check validity with respect to the arg table entry. + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); assert(curArgTabEntry); if (curArgTabEntry->regNum == REG_STK) { // late arg that is not passed in a register - DISPNODE(argNode); assert(argNode->gtOper == GT_PUTARG_STK); - argNode->gtLsraInfo.srcCount = 1; - argNode->gtLsraInfo.dstCount = 0; #ifdef FEATURE_PUT_STRUCT_ARG_STK // If the node is TYP_STRUCT and it is put on stack with @@ -1316,35 +1231,33 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) if (argNode->TypeGet() == TYP_STRUCT) { assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ); - argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0; - argNode->gtLsraInfo.srcCount = 0; + assert(argNode->gtLsraInfo.srcCount == 0); } #endif // FEATURE_PUT_STRUCT_ARG_STK - continue; } - #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (argNode->OperGet() == GT_FIELD_LIST) { - argNode->SetContained(); + assert(argNode->isContained()); assert(varTypeIsStruct(argNode) || curArgTabEntry->isStruct); - unsigned eightbyte = 0; + int i = 0; for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) { - const regNumber argReg = eightbyte == 0 ? curArgTabEntry->regNum : curArgTabEntry->otherRegNum; - TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, isVarArgs, &callHasFloatRegArgs); - - eightbyte++; + const regNumber argReg = (i == 0) ? curArgTabEntry->regNum : curArgTabEntry->otherRegNum; + assert(entry->Current()->gtRegNum == argReg); + assert(i < 2); + i++; } } else #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { - TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, isVarArgs, - &callHasFloatRegArgs); + const regNumber argReg = curArgTabEntry->regNum; + assert(argNode->gtRegNum == argReg); } +#endif // DEBUG } // Now, count stack args @@ -1361,41 +1274,11 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) if (!(args->gtFlags & GTF_LATE_ARG)) { TreeNodeInfo* argInfo = &(arg->gtLsraInfo); - if (argInfo->dstCount != 0) + if ((argInfo->dstCount != 0) && !arg->IsArgPlaceHolderNode() && !arg->isContained()) { argInfo->isLocalDefUse = true; } - - // If the child of GT_PUTARG_STK is a constant, we don't need a register to - // move it to memory (stack location). - // - // On AMD64, we don't want to make 0 contained, because we can generate smaller code - // by zeroing a register and then storing it. E.g.: - // xor rdx, rdx - // mov gword ptr [rsp+28H], rdx - // is 2 bytes smaller than: - // mov gword ptr [rsp+28H], 0 - // - // On x86, we push stack arguments; we don't use 'mov'. So: - // push 0 - // is 1 byte smaller than: - // xor rdx, rdx - // push rdx - - argInfo->dstCount = 0; - if (arg->gtOper == GT_PUTARG_STK) - { - GenTree* op1 = arg->gtOp.gtOp1; - if (IsContainableImmed(arg, op1) -#if defined(_TARGET_AMD64_) - && !op1->IsIntegralConst(0) -#endif // _TARGET_AMD64_ - ) - { - MakeSrcContained(arg, op1); - arg->gtLsraInfo.srcCount--; - } - } + assert(argInfo->dstCount == 0); } args = args->gtOp.gtOp2; } @@ -1432,8 +1315,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // Sources are dest address, initVal or source. // We may require an additional source or temp register for the size. - blkNode->gtLsraInfo.srcCount = 0; - blkNode->gtLsraInfo.dstCount = 0; + blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr); + assert(blkNode->gtLsraInfo.dstCount == 0); blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE); GenTreePtr srcAddrOrFill = nullptr; bool isInitBlk = blkNode->OperIsInitBlkOp(); @@ -1447,10 +1330,14 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) GenTree* initVal = source; if (initVal->OperIsInitVal()) { - initVal->SetContained(); + assert(initVal->isContained()); initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; + if (!initVal->isContained()) + { + blkNode->gtLsraInfo.srcCount++; + } switch (blkNode->gtBlkOpKind) { @@ -1458,22 +1345,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) assert(initVal->IsCnsIntOrI()); if (size >= XMM_REGSIZE_BYTES) { - // Reserve an XMM register to fill it with - // a pack of 16 init value constants. - ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; + // Reserve an XMM register to fill it with a pack of 16 init value constants. blkNode->gtLsraInfo.internalFloatCount = 1; blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates()); - if ((fill == 0) && ((size & 0xf) == 0)) - { - MakeSrcContained(blkNode, initVal); - } - // Use an XMM register to fill with constants; it's an AVX instruction, so set the flags. + // use XMM register to fill with constants, it's AVX instruction and set the flag SetContainsAVXFlags(); } - if (!initVal->isContained()) - { - blkNode->gtLsraInfo.srcCount++; - } #ifdef _TARGET_X86_ if ((size & 1) != 0) { @@ -1491,7 +1368,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // a) The memory address to be in RDI. // b) The fill value has to be in RAX. // c) The buffer size will go in RCX. - blkNode->gtLsraInfo.srcCount++; dstAddrRegMask = RBM_RDI; srcAddrOrFill = initVal; sourceRegMask = RBM_RAX; @@ -1501,7 +1377,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) case GenTreeBlk::BlkOpKindHelper: #ifdef _TARGET_AMD64_ // The helper follows the regular AMD64 ABI. - blkNode->gtLsraInfo.srcCount++; dstAddrRegMask = RBM_ARG_0; sourceRegMask = RBM_ARG_1; blkSizeRegMask = RBM_ARG_2; @@ -1521,23 +1396,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // CopyObj or CopyBlk if (source->gtOper == GT_IND) { - srcAddrOrFill = blkNode->Data()->gtGetOp1(); - // We're effectively setting source as contained, but can't call MakeSrcContained, because the - // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. - // If srcAddr is already non-contained, we don't need to change it. - if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) - { - srcAddrOrFill->gtLsraInfo.setDstCount(1); - srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); - } - m_lsra->clearOperandCounts(source); - source->SetContained(); - source->AsIndir()->Addr()->ClearContained(); - } - else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) - { - assert(source->IsLocal()); - MakeSrcContained(blkNode, source); + srcAddrOrFill = source->gtGetOp1(); } if (blkNode->OperGet() == GT_STORE_OBJ) { @@ -1586,18 +1445,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // are used for codegen, set ContainsAVX flag SetContainsAVXFlags(); } - // If src or dst are on stack, we don't have to generate the address - // into a register because it's just some constant+SP. - if ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr()) - { - MakeSrcContained(blkNode, srcAddrOrFill); - } - - if (dstAddr->OperIsLocalAddr()) - { - MakeSrcContained(blkNode, dstAddr); - } - break; case GenTreeBlk::BlkOpKindRepInstr: @@ -1630,7 +1477,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source); } - blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(dstAddr); if (dstAddrRegMask != RBM_NONE) { dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask); @@ -1659,7 +1505,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) else { // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); + assert(blkNode->gtOper == GT_STORE_DYN_BLK); blkNode->gtLsraInfo.setSrcCount(3); GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask); @@ -1682,6 +1528,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) TreeNodeInfo* info = &(putArgStk->gtLsraInfo); LinearScan* l = m_lsra; info->srcCount = 0; + assert(info->dstCount == 0); if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST) { @@ -1699,50 +1546,15 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) const unsigned fieldOffset = current->gtFieldOffset; assert(fieldType != TYP_LONG); - // For x86 we must mark all integral fields as contained or reg-optional, and handle them - // accordingly in code generation, since we may have up to 8 fields, which cannot all be in - // registers to be consumed atomically by the call. - if (varTypeIsIntegralOrI(fieldNode)) - { - if (fieldNode->OperGet() == GT_LCL_VAR) - { - LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]); - if (varDsc->lvTracked && !varDsc->lvDoNotEnregister) - { - SetRegOptional(fieldNode); - } - else - { - MakeSrcContained(putArgStk, fieldNode); - } - } - else if (fieldNode->IsIntCnsFitsInI32()) - { - MakeSrcContained(putArgStk, fieldNode); - } - else - { - // For the case where we cannot directly push the value, if we run out of registers, - // it would be better to defer computation until we are pushing the arguments rather - // than spilling, but this situation is not all that common, as most cases of promoted - // structs do not have a large number of fields, and of those most are lclVars or - // copy-propagated constants. - SetRegOptional(fieldNode); - } - } #if defined(FEATURE_SIMD) - // Note that we need to check the GT_FIELD_LIST type, not the fieldType. This is because the + // Note that we need to check the GT_FIELD_LIST type, not 'fieldType'. This is because the // GT_FIELD_LIST will be TYP_SIMD12 whereas the fieldType might be TYP_SIMD16 for lclVar, where // we "round up" to 16. - else if (current->gtFieldType == TYP_SIMD12) + if (current->gtFieldType == TYP_SIMD12) { needsSimdTemp = true; } #endif // defined(FEATURE_SIMD) - else - { - assert(varTypeIsFloating(fieldNode) || varTypeIsSIMD(fieldNode)); - } // We can treat as a slot any field that is stored at a slot boundary, where the previous // field is not in the same slot. (Note that we store the fields in reverse order.) @@ -1771,8 +1583,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) } } - info->dstCount = 0; - if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push) { // If any of the fields cannot be stored with an actual push, we may need a temporary @@ -1790,6 +1600,8 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) // For PutArgStk of a TYP_SIMD12, we need a SIMD temp register. if (needsSimdTemp) { + info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount; + assert(info->dstCount == 0); info->internalFloatCount += 1; info->addInternalCandidates(l, l->allSIMDRegs()); } @@ -1804,7 +1616,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) if (putArgStk->TypeGet() == TYP_SIMD12) { info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount; - info->dstCount = 0; info->internalFloatCount = 1; info->setInternalCandidates(l, l->allSIMDRegs()); return; @@ -1821,19 +1632,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) GenTreePtr src = putArgStk->gtOp1; GenTreePtr srcAddr = nullptr; - bool haveLocalAddr = false; - if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND)) - { - srcAddr = src->gtOp.gtOp1; - assert(srcAddr != nullptr); - haveLocalAddr = srcAddr->OperIsLocalAddr(); - } - else - { - assert(varTypeIsSIMD(putArgStk)); - } - - info->dstCount = 0; + info->srcCount = GetOperandSourceCount(src); // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of @@ -1886,17 +1685,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) default: unreached(); } - - // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree. - MakeSrcContained(putArgStk, src); - - if (haveLocalAddr) - { - // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary - // copies. - MakeSrcContained(putArgStk, srcAddr); - } - info->srcCount = GetOperandSourceCount(src); } #endif // FEATURE_PUT_STRUCT_ARG_STK @@ -1911,13 +1699,12 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) // void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) { - ContainCheckLclHeap(tree->AsOp()); TreeNodeInfo* info = &(tree->gtLsraInfo); LinearScan* l = m_lsra; Compiler* compiler = comp; info->srcCount = 1; - info->dstCount = 1; + assert(info->dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): // Here '-' means don't care. @@ -2009,7 +1796,6 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) // void Lowering::TreeNodeInfoInitModDiv(GenTree* tree) { - ContainCheckDivOrMod(tree->AsOp()); TreeNodeInfo* info = &(tree->gtLsraInfo); LinearScan* l = m_lsra; @@ -2018,7 +1804,7 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree) info->srcCount = GetOperandSourceCount(op1); info->srcCount += GetOperandSourceCount(op2); - info->dstCount = 1; + assert(info->dstCount == 1); if (varTypeIsFloating(tree->TypeGet())) { @@ -2085,8 +1871,6 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree) // void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree) { - ContainCheckIntrinsic(tree->AsOp()); - TreeNodeInfo* info = &(tree->gtLsraInfo); LinearScan* l = m_lsra; @@ -2096,7 +1880,7 @@ void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree) assert(op1->TypeGet() == tree->TypeGet()); info->srcCount = GetOperandSourceCount(op1); - info->dstCount = 1; + assert(info->dstCount == 1); switch (tree->gtIntrinsic.gtIntrinsicId) { @@ -2149,14 +1933,21 @@ void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree) // Return Value: // None. -void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) +void Lowering::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) { - GenTreeSIMD* simdTree = tree->AsSIMD(); - ContainCheckSIMD(simdTree); - - TreeNodeInfo* info = &(tree->gtLsraInfo); + TreeNodeInfo* info = &(simdTree->gtLsraInfo); LinearScan* lsra = m_lsra; - info->dstCount = 1; + // Only SIMDIntrinsicInit can be contained. Other than that, + // only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount. + if (simdTree->isContained()) + { + assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); + } + else if (info->dstCount != 1) + { + assert((simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) || + (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality)); + } SetContainsAVXFlags(true, simdTree->gtSIMDSize); switch (simdTree->gtSIMDIntrinsicID) { @@ -2165,7 +1956,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) case SIMDIntrinsicInit: { - op1 = tree->gtOp.gtOp1; + op1 = simdTree->gtOp.gtOp1; #if !defined(_TARGET_64BIT_) if (op1->OperGet() == GT_LONG) @@ -2297,13 +2088,11 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: - info->srcCount = 2; // On SSE4/AVX, we can generate optimal code for (in)equality // against zero using ptest. We can safely do this optimization // for integral vectors but not for floating-point for the reason // that we have +0.0 and -0.0 and +0.0 == -0.0 - op2 = tree->gtGetOp2(); if (simdTree->gtGetOp2()->isContained()) { info->srcCount = 1; @@ -2321,6 +2110,19 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) info->internalFloatCount = 1; info->setInternalCandidates(lsra, lsra->allSIMDRegs()); } + if (info->isNoRegCompare) + { + info->dstCount = 0; + // Codegen of SIMD (in)Equality uses target integer reg only for setting flags. + // A target reg is not needed on AVX when comparing against Vector Zero. + // In all other cases we need to reserve an int type internal register if we + // don't have a target register on the compare. + if (!comp->canUseAVX() || !simdTree->gtGetOp2()->IsIntegralConstVector(0)) + { + info->internalIntCount = 1; + info->addInternalCandidates(lsra, lsra->allRegs(TYP_INT)); + } + } break; case SIMDIntrinsicDotProduct: @@ -2367,9 +2169,10 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) // - the source SIMD struct // - index (which element to get) // The result is baseType of SIMD struct. + // op1 may be a contained memory op, but if so we will consume its address. info->srcCount = 0; - op1 = tree->gtOp.gtOp1; - op2 = tree->gtOp.gtOp2; + op1 = simdTree->gtOp.gtOp1; + op2 = simdTree->gtOp.gtOp2; // op2 may be a contained constant. if (!op2->isContained()) @@ -2579,7 +2382,7 @@ void Lowering::TreeNodeInfoInitCast(GenTree* tree) var_types castOpType = castOp->TypeGet(); info->srcCount = GetOperandSourceCount(castOp); - info->dstCount = 1; + assert(info->dstCount == 1); if (tree->gtFlags & GTF_UNSIGNED) { castOpType = genUnsignedType(castOpType); @@ -2615,25 +2418,11 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) GenTreePtr addr = dst->Addr(); GenTreePtr src = dst->Data(); - if (addr->OperGet() == GT_LEA) - { - // In the case where we are doing a helper assignment, if the dst - // is an indir through an lea, we need to actually instantiate the - // lea in a register - GenTreeAddrMode* lea = addr->AsAddrMode(); - - int leaSrcCount = 0; - if (lea->HasBase()) - { - leaSrcCount++; - } - if (lea->HasIndex()) - { - leaSrcCount++; - } - lea->gtLsraInfo.srcCount = leaSrcCount; - lea->gtLsraInfo.dstCount = 1; - } + // In the case where we are doing a helper assignment, we need to actually instantiate the + // address in a register. + assert(!addr->isContained()); + tree->gtLsraInfo.srcCount = 1 + GetIndirSourceCount(dst); + assert(tree->gtLsraInfo.dstCount == 0); bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers. @@ -2695,32 +2484,48 @@ void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) return; } - ContainCheckIndir(indirTree); - - GenTree* addr = indirTree->gtGetOp1(); TreeNodeInfo* info = &(indirTree->gtLsraInfo); - GenTreePtr base = nullptr; - GenTreePtr index = nullptr; - unsigned mul, cns; - bool rev; - info->srcCount = GetIndirSourceCount(indirTree); if (indirTree->gtOper == GT_STOREIND) { GenTree* source = indirTree->gtOp.gtOp2; if (indirTree->AsStoreInd()->IsRMWMemoryOp()) { + // Because 'source' is contained, we haven't yet determined its special register requirements, if any. + // As it happens, the Shift or Rotate cases are the only ones with special requirements. + assert(source->isContained() && source->OperIsRMWMemOp()); + GenTree* nonMemSource = nullptr; + + if (source->OperIsShiftOrRotate()) + { + TreeNodeInfoInitShiftRotate(source); + } if (indirTree->AsStoreInd()->IsRMWDstOp1()) { if (source->OperIsBinary()) { - info->srcCount += GetOperandSourceCount(source->gtOp.gtOp2); + nonMemSource = source->gtOp.gtOp2; } } else if (indirTree->AsStoreInd()->IsRMWDstOp2()) { - info->srcCount += GetOperandSourceCount(source->gtOp.gtOp1); + nonMemSource = source->gtOp.gtOp1; + } + if (nonMemSource != nullptr) + { + info->srcCount += GetOperandSourceCount(nonMemSource); + assert(!nonMemSource->isContained() || (!nonMemSource->isMemoryOp() && !nonMemSource->IsLocal())); +#ifdef _TARGET_X86_ + if (varTypeIsByte(indirTree) && !nonMemSource->isContained()) + { + // If storeInd is of TYP_BYTE, set source to byteable registers. + regMaskTP regMask = nonMemSource->gtLsraInfo.getSrcCandidates(m_lsra); + regMask &= ~RBM_NON_BYTE_REGS; + assert(regMask != RBM_NONE); + nonMemSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask); + } +#endif } } else @@ -2778,11 +2583,17 @@ void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) { assert(tree->OperIsCompare() || tree->OperIs(GT_CMP)); - ContainCheckCompare(tree->AsOp()); TreeNodeInfo* info = &(tree->gtLsraInfo); info->srcCount = 0; - info->dstCount = tree->OperIs(GT_CMP) ? 0 : 1; + if (info->isNoRegCompare) + { + info->dstCount = 0; + } + else + { + assert((info->dstCount == 1) || tree->OperIs(GT_CMP)); + } #ifdef _TARGET_X86_ // If the compare is used by a jump, we just need to set the condition codes. If not, then we need @@ -2798,7 +2609,10 @@ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) var_types op1Type = op1->TypeGet(); var_types op2Type = op2->TypeGet(); - info->srcCount += GetOperandSourceCount(op1); + if (!op1->gtLsraInfo.isNoRegCompare) + { + info->srcCount += GetOperandSourceCount(op1); + } info->srcCount += GetOperandSourceCount(op2); #if !defined(_TARGET_64BIT_) @@ -2816,166 +2630,6 @@ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) #endif // !defined(_TARGET_64BIT_) } -//-------------------------------------------------------------------------------------------- -// TreeNodeInfoInitIfRMWMemOp: Checks to see if there is a RMW memory operation rooted at -// GT_STOREIND node and if so will mark register requirements for nodes under storeInd so -// that CodeGen will generate a single instruction of the form: -// -// binOp [addressing mode], reg -// -// Parameters -// storeInd - GT_STOREIND node -// -// Return value -// True, if RMW memory op tree pattern is recognized and op counts are set. -// False otherwise. -// -bool Lowering::TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd) -{ - assert(storeInd->OperGet() == GT_STOREIND); - - // SSE2 doesn't support RMW on float values - assert(!varTypeIsFloating(storeInd)); - - // Terminology: - // indirDst = memory write of an addr mode (i.e. storeind destination) - // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op) - // indirCandidate = memory read i.e. a gtInd of an addr mode - // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node) - - GenTreePtr indirCandidate = nullptr; - GenTreePtr indirOpSource = nullptr; - - if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource)) - { - JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n", - storeInd->AsStoreInd()->GetRMWStatus()); - DISPTREERANGE(BlockRange(), storeInd); - return false; - } - - GenTreePtr indirDst = storeInd->gtGetOp1(); - GenTreePtr indirSrc = storeInd->gtGetOp2(); - genTreeOps oper = indirSrc->OperGet(); - - // At this point we have successfully detected a RMW memory op of one of the following forms - // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR - // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR - // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations - // - // Here indirSrc = one of the supported binary or unary operation for RMW of memory - // indirCandidate = a GT_IND node - // indirCandidateChild = operand of GT_IND indirCandidate - // - // The logic below essentially does the following - // Make indirOpSource contained. - // Make indirSrc contained. - // Make indirCandidate contained. - // Make indirCandidateChild contained. - // Make indirDst contained except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr - // base. - // Note that due to the way containment is supported, we accomplish some of the above by clearing operand counts - // and directly propagating them upward. - // - - TreeNodeInfo* info = &(storeInd->gtLsraInfo); - info->dstCount = 0; - - if (GenTree::OperIsBinary(oper)) - { - // On Xarch RMW operations require that the non-rmw operand be an immediate or in a register. - // Therefore, if we have previously marked the indirOpSource as a contained memory op while lowering - // the binary node, we need to reset that now. - if (IsContainableMemoryOp(indirOpSource, true)) - { - indirOpSource->ClearContained(); - } - assert(!indirOpSource->isContained() || indirOpSource->OperIsConst()); - JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n"); - info->srcCount = indirOpSource->gtLsraInfo.dstCount; - } - else - { - assert(GenTree::OperIsUnary(oper)); - JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n"); - info->srcCount = 0; - } - DISPTREERANGE(BlockRange(), storeInd); - - m_lsra->clearOperandCounts(indirSrc); - indirSrc->SetContained(); - m_lsra->clearOperandCounts(indirCandidate); - indirCandidate->SetContained(); - - GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1(); - if (indirCandidateChild->OperGet() == GT_LEA) - { - GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode(); - - if (addrMode->HasBase()) - { - assert(addrMode->Base()->OperIsLeaf()); - m_lsra->clearOperandCounts(addrMode->Base()); - addrMode->Base()->SetContained(); - info->srcCount++; - } - - if (addrMode->HasIndex()) - { - assert(addrMode->Index()->OperIsLeaf()); - m_lsra->clearOperandCounts(addrMode->Index()); - addrMode->Index()->SetContained(); - info->srcCount++; - } - - m_lsra->clearOperandCounts(indirDst); - indirDst->SetContained(); - } - else - { - assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || - indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT); - - // If it is a GT_LCL_VAR, it still needs the reg to hold the address. - // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base. - // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit - // time. Also, we don't need a reg for GT_CLS_VAR_ADDR. - if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR) - { - m_lsra->clearOperandCounts(indirDst); - indirDst->SetContained(); - } - else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp)) - { - m_lsra->clearOperandCounts(indirDst); - indirDst->SetContained(); - } - else - { - // Need a reg and hence increment src count of storeind - info->srcCount += indirCandidateChild->gtLsraInfo.dstCount; - } - } - m_lsra->clearOperandCounts(indirCandidateChild); - indirCandidateChild->SetContained(); - -#ifdef _TARGET_X86_ - if (varTypeIsByte(storeInd)) - { - // If storeInd is of TYP_BYTE, set indirOpSources to byteable registers. - bool containedNode = indirOpSource->gtLsraInfo.dstCount == 0; - if (!containedNode) - { - regMaskTP regMask = indirOpSource->gtLsraInfo.getSrcCandidates(m_lsra); - assert(regMask != RBM_NONE); - indirOpSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask & ~RBM_NON_BYTE_REGS); - } - } -#endif - - return true; -} - //------------------------------------------------------------------------ // TreeNodeInfoInitMul: Set the NodeInfo for a multiply. // @@ -2987,20 +2641,17 @@ bool Lowering::TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd) // void Lowering::TreeNodeInfoInitMul(GenTreePtr tree) { - ContainCheckMul(tree->AsOp()); - #if defined(_TARGET_X86_) assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG)); #else assert(tree->OperIs(GT_MUL, GT_MULHI)); #endif TreeNodeInfo* info = &(tree->gtLsraInfo); - - GenTree* op1 = tree->gtOp.gtOp1; - GenTree* op2 = tree->gtOp.gtOp2; - info->srcCount = GetOperandSourceCount(op1); + GenTree* op1 = tree->gtOp.gtOp1; + GenTree* op2 = tree->gtOp.gtOp2; + info->srcCount = GetOperandSourceCount(op1); info->srcCount += GetOperandSourceCount(op2); - info->dstCount = 1; + assert(info->dstCount == 1); // Case of float/double mul. if (varTypeIsFloating(tree->TypeGet())) @@ -3171,7 +2822,7 @@ bool Lowering::ExcludeNonByteableRegisters(GenTree* tree) GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); var_types baseType = simdNode->gtSIMDBaseType; - if (!IsContainableMemoryOp(op1, true) && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType)) + if (!IsContainableMemoryOp(op1) && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType)) { bool ZeroOrSignExtnReqd = true; unsigned baseSize = genTypeSize(baseType); @@ -3230,9 +2881,6 @@ int Lowering::GetOperandSourceCount(GenTree* node) if (node->OperIsIndir()) { const unsigned srcCount = GetIndirSourceCount(node->AsIndir()); - // TODO-Cleanup: Once we are doing containment analysis during Lowering, this - // can be removed, or changed to an assert. - node->gtLsraInfo.srcCount = 0; return srcCount; } diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h index 5f03da2776..3f8532bd37 100644 --- a/src/jit/nodeinfo.h +++ b/src/jit/nodeinfo.h @@ -32,6 +32,7 @@ public: regOptional = false; definesAnyRegisters = false; isInternalRegDelayFree = false; + isNoRegCompare = false; #ifdef DEBUG isInitialized = false; #endif @@ -144,6 +145,9 @@ public: // in which result is produced. unsigned char isInternalRegDelayFree : 1; + // True if this is a compare feeding a JTRUE that doesn't need to be generated into a register. + unsigned char isNoRegCompare : 1; + #ifdef DEBUG // isInitialized is set when the tree node is handled. unsigned char isInitialized : 1; diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index 5244c7424d..257e02d491 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -516,6 +516,7 @@ void Rationalizer::RewriteAssignment(LIR::Use& use) location->gtType = TYP_BYREF; assignment->SetOper(GT_STOREIND); + assignment->AsStoreInd()->SetRMWStatusDefault(); // TODO: JIT dump } |