summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2017-08-17 10:04:09 -0700
committerGitHub <noreply@github.com>2017-08-17 10:04:09 -0700
commit5dada64d4bc9eddb63579650e1fce4f5ffda0294 (patch)
tree73855a6301d2596a2a616b8ffc6b4d602f185576
parent1fbd304b5fef76d1d53ca5d886821ef334646b32 (diff)
parent0ad49b141f1d06c5e83ec58fa7f21c7d2f3714d3 (diff)
downloadcoreclr-5dada64d4bc9eddb63579650e1fce4f5ffda0294.tar.gz
coreclr-5dada64d4bc9eddb63579650e1fce4f5ffda0294.tar.bz2
coreclr-5dada64d4bc9eddb63579650e1fce4f5ffda0294.zip
Merge pull request #13198 from CarolEidt/LowerContain
Move containment analysis to 1st phase of Lowering
-rw-r--r--src/jit/codegenarmarch.cpp3
-rw-r--r--src/jit/codegenxarch.cpp3
-rw-r--r--src/jit/compiler.h5
-rw-r--r--src/jit/flowgraph.cpp11
-rw-r--r--src/jit/gentree.cpp123
-rw-r--r--src/jit/gentree.h32
-rw-r--r--src/jit/lclvars.cpp12
-rw-r--r--src/jit/lir.h7
-rw-r--r--src/jit/liveness.cpp9
-rw-r--r--src/jit/lower.cpp587
-rw-r--r--src/jit/lower.h108
-rw-r--r--src/jit/lowerarmarch.cpp122
-rw-r--r--src/jit/lowerxarch.cpp1031
-rw-r--r--src/jit/lsra.cpp6
-rw-r--r--src/jit/lsra.h23
-rw-r--r--src/jit/lsraarm.cpp120
-rw-r--r--src/jit/lsraarm64.cpp111
-rw-r--r--src/jit/lsraarmarch.cpp283
-rw-r--r--src/jit/lsraxarch.cpp916
-rw-r--r--src/jit/nodeinfo.h4
-rw-r--r--src/jit/rationalize.cpp1
21 files changed, 1974 insertions, 1543 deletions
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index 47dc419dba..61381d4c78 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -265,7 +265,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LIST:
case GT_FIELD_LIST:
case GT_ARGPLACE:
- // Nothing to do
+ // Should always be marked contained.
+ assert(!"LIST, FIELD_LIST and ARGPLACE nodes should always be marked contained.");
break;
case GT_PUTARG_STK:
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 116b0f30cf..5d1160ac96 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -1851,7 +1851,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LIST:
case GT_FIELD_LIST:
case GT_ARGPLACE:
- // Nothing to do
+ // Should always be marked contained.
+ assert(!"LIST, FIELD_LIST and ARGPLACE nodes should always be marked contained.");
break;
case GT_SWAP:
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 65573188a0..2c6f9b203e 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -2028,7 +2028,7 @@ public:
GenTree* gtNewBlkOpNode(GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock);
- GenTree* gtNewPutArgReg(var_types type, GenTreePtr arg);
+ GenTree* gtNewPutArgReg(var_types type, GenTreePtr arg, regNumber argReg);
protected:
void gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOrFillVal, bool isVolatile);
@@ -2444,6 +2444,9 @@ public:
DNER_DepField, // It is a field of a dependently promoted struct
DNER_NoRegVars, // opts.compFlags & CLFLG_REGVAR is not set
DNER_MinOptsGC, // It is a GC Ref and we are compiling MinOpts
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+ DNER_LongParamField, // It is a decomposed field of a long parameter.
+#endif
#ifdef JIT32_GCENCODER
DNER_PinningRef,
#endif
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 0cec40446a..91a478ca15 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -17,6 +17,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#endif
#include "allocacheck.h" // for alloca
+#ifndef LEGACY_BACKEND
+#include "lower.h" // for LowerRange()
+#endif
/*****************************************************************************/
@@ -13477,6 +13480,10 @@ bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block)
if (block->IsLIR())
{
LIR::AsRange(block).InsertAtEnd(nop);
+#ifndef LEGACY_BACKEND
+ LIR::ReadOnlyRange range(nop, nop);
+ m_pLowering->LowerRange(block, range);
+#endif
}
else
{
@@ -13796,6 +13803,10 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
if (block->IsLIR())
{
blockRange->InsertAfter(switchVal, zeroConstNode, condNode);
+#ifndef LEGACY_BACKEND
+ LIR::ReadOnlyRange range(zeroConstNode, switchTree);
+ m_pLowering->LowerRange(block, range);
+#endif // !LEGACY_BACKEND
}
else
{
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 08ba61e012..2f023118a8 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -610,42 +610,6 @@ void Compiler::fgWalkAllTreesPre(fgWalkPreFn* visitor, void* pCallBackData)
}
}
-// ------------------------------------------------------------------------------------------
-// gtClearReg: Sets the register to the "no register assignment" value, depending upon
-// the type of the node, and whether it fits any of the special cases for register pairs
-// or multi-reg call nodes.
-//
-// Arguments:
-// compiler - compiler instance
-//
-// Return Value:
-// None
-void GenTree::gtClearReg(Compiler* compiler)
-{
-#if CPU_LONG_USES_REGPAIR
- if (isRegPairType(TypeGet()) ||
- // (IsLocal() && isRegPairType(compiler->lvaTable[gtLclVarCommon.gtLclNum].TypeGet())) ||
- (OperGet() == GT_MUL && (gtFlags & GTF_MUL_64RSLT)))
- {
- gtRegPair = REG_PAIR_NONE;
- }
- else
-#endif // CPU_LONG_USES_REGPAIR
- {
- gtRegNum = REG_NA;
- }
-
- // Also clear multi-reg state if this is a call node
- if (IsCall())
- {
- this->AsCall()->ClearOtherRegs();
- }
- else if (IsCopyOrReload())
- {
- this->AsCopyOrReload()->ClearOtherRegs();
- }
-}
-
//-----------------------------------------------------------
// CopyReg: Copy the _gtRegNum/_gtRegPair/gtRegTag fields.
//
@@ -704,56 +668,52 @@ bool GenTree::gtHasReg() const
{
assert(_gtRegNum != REG_NA);
INDEBUG(assert(gtRegTag == GT_REGTAG_REGPAIR));
- hasReg = (gtRegPair != REG_PAIR_NONE);
+ return (gtRegPair != REG_PAIR_NONE);
}
- else
+ assert(_gtRegNum != REG_PAIR_NONE);
+ INDEBUG(assert(gtRegTag == GT_REGTAG_REG));
#endif
+ if (IsMultiRegCall())
{
- assert(_gtRegNum != REG_PAIR_NONE);
- INDEBUG(assert(gtRegTag == GT_REGTAG_REG));
+ // Have to cast away const-ness because GetReturnTypeDesc() is a non-const method
+ GenTree* tree = const_cast<GenTree*>(this);
+ GenTreeCall* call = tree->AsCall();
+ unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+ hasReg = false;
- if (IsMultiRegCall())
+ // A Multi-reg call node is said to have regs, if it has
+ // reg assigned to each of its result registers.
+ for (unsigned i = 0; i < regCount; ++i)
{
- // Has to cast away const-ness because GetReturnTypeDesc() is a non-const method
- GenTree* tree = const_cast<GenTree*>(this);
- GenTreeCall* call = tree->AsCall();
- unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
- hasReg = false;
-
- // A Multi-reg call node is said to have regs, if it has
- // reg assigned to each of its result registers.
- for (unsigned i = 0; i < regCount; ++i)
+ hasReg = (call->GetRegNumByIdx(i) != REG_NA);
+ if (!hasReg)
{
- hasReg = (call->GetRegNumByIdx(i) != REG_NA);
- if (!hasReg)
- {
- break;
- }
+ break;
}
}
- else if (IsCopyOrReloadOfMultiRegCall())
- {
- GenTree* tree = const_cast<GenTree*>(this);
- GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
- GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
- unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
- hasReg = false;
+ }
+ else if (IsCopyOrReloadOfMultiRegCall())
+ {
+ GenTree* tree = const_cast<GenTree*>(this);
+ GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+ GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
+ unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+ hasReg = false;
- // A Multi-reg copy or reload node is said to have regs,
- // if it has valid regs in any of the positions.
- for (unsigned i = 0; i < regCount; ++i)
+ // A Multi-reg copy or reload node is said to have regs,
+ // if it has valid regs in any of the positions.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ hasReg = (copyOrReload->GetRegNumByIdx(i) != REG_NA);
+ if (hasReg)
{
- hasReg = (copyOrReload->GetRegNumByIdx(i) != REG_NA);
- if (hasReg)
- {
- break;
- }
+ break;
}
}
- else
- {
- hasReg = (gtRegNum != REG_NA);
- }
+ }
+ else
+ {
+ hasReg = (gtRegNum != REG_NA);
}
return hasReg;
@@ -7006,8 +6966,9 @@ GenTree* Compiler::gtNewBlkOpNode(
// gtNewPutArgReg: Creates a new PutArgReg node.
//
// Arguments:
-// type - The actual type of the argument
-// arg - The argument node
+// type - The actual type of the argument
+// arg - The argument node
+// argReg - The register that the argument will be passed in
//
// Return Value:
// Returns the newly created PutArgReg node.
@@ -7015,7 +6976,7 @@ GenTree* Compiler::gtNewBlkOpNode(
// Notes:
// The node is generated as GenTreeMultiRegOp on armel, as GenTreeOp on all the other archs
//
-GenTreePtr Compiler::gtNewPutArgReg(var_types type, GenTreePtr arg)
+GenTreePtr Compiler::gtNewPutArgReg(var_types type, GenTreePtr arg, regNumber argReg)
{
assert(arg != nullptr);
@@ -7031,6 +6992,7 @@ GenTreePtr Compiler::gtNewPutArgReg(var_types type, GenTreePtr arg)
{
node = gtNewOperNode(GT_PUTARG_REG, type, arg);
}
+ node->gtRegNum = argReg;
return node;
}
@@ -15233,10 +15195,13 @@ bool GenTree::isContained() const
}
// these actually produce a register (the flags reg, we just don't model it)
- // and are a separate instruction from the branch that consumes the result
+ // and are a separate instruction from the branch that consumes the result.
+ // They can only produce a result if the child is a SIMD equality comparison.
else if (OperKind() & GTK_RELOP)
{
- assert(!isMarkedContained);
+ // We have to cast away const-ness since AsOp() method is non-const.
+ GenTree* childNode = const_cast<GenTree*>(this)->AsOp()->gtOp1;
+ assert((isMarkedContained == false) || childNode->IsSIMDEqualityOrInequality());
}
// these either produce a result in register or set flags reg.
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index afb835e775..6a87dfbee6 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -727,9 +727,6 @@ public:
// Copy the _gtRegNum/_gtRegPair/gtRegTag fields
void CopyReg(GenTreePtr from);
-
- void gtClearReg(Compiler* compiler);
-
bool gtHasReg() const;
regMaskTP gtGetRegMask() const;
@@ -1437,6 +1434,20 @@ public:
|| OperIsShiftOrRotate(op);
}
+#ifdef _TARGET_XARCH_
+ static bool OperIsRMWMemOp(genTreeOps gtOper)
+ {
+ // Return if binary op is one of the supported operations for RMW of memory.
+ return (gtOper == GT_ADD || gtOper == GT_SUB || gtOper == GT_AND || gtOper == GT_OR || gtOper == GT_XOR ||
+ gtOper == GT_NOT || gtOper == GT_NEG || OperIsShiftOrRotate(gtOper));
+ }
+ bool OperIsRMWMemOp() const
+ {
+ // Return if binary op is one of the supported operations for RMW of memory.
+ return OperIsRMWMemOp(gtOper);
+ }
+#endif // _TARGET_XARCH_
+
#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
static bool OperIsHigh(genTreeOps gtOper)
{
@@ -1998,13 +2009,15 @@ public:
void SetContained()
{
+ assert(IsValue());
gtFlags |= GTF_CONTAINED;
}
void ClearContained()
{
+ assert(IsValue());
gtFlags &= ~GTF_CONTAINED;
- gtLsraInfo.regOptional = false;
+ ClearRegOptional();
}
#endif // !LEGACY_BACKEND
@@ -2114,6 +2127,12 @@ public:
// that codegen can still generate code even if it wasn't allocated a
// register.
bool IsRegOptional() const;
+#ifndef LEGACY_BACKEND
+ void ClearRegOptional()
+ {
+ gtLsraInfo.regOptional = false;
+ }
+#endif
// Returns "true" iff "this" is a phi-related node (i.e. a GT_PHI_ARG, GT_PHI, or a PhiDefn).
bool IsPhiNode();
@@ -3112,6 +3131,11 @@ struct GenTreeFieldList : public GenTreeArgList
{
prevList->gtOp2 = this;
}
+#ifndef LEGACY_BACKEND
+ // A GT_FIELD_LIST is always contained. Note that this should only matter for the head node, but
+ // the list may be reordered.
+ gtFlags |= GTF_CONTAINED;
+#endif
}
};
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index 065753af1a..fe8aaac35c 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -2035,7 +2035,12 @@ void Compiler::lvaPromoteLongVars()
fieldVarDsc->lvFldOffset = (unsigned char)(index * genTypeSize(TYP_INT));
fieldVarDsc->lvFldOrdinal = (unsigned char)index;
fieldVarDsc->lvParentLcl = lclNum;
- fieldVarDsc->lvIsParam = isParam;
+ // Currently we do not support enregistering incoming promoted aggregates with more than one field.
+ if (isParam)
+ {
+ fieldVarDsc->lvIsParam = true;
+ lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LongParamField));
+ }
}
}
@@ -2170,6 +2175,11 @@ void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregister
assert(varDsc->lvPinned);
break;
#endif
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+ case DNER_LongParamField:
+ JITDUMP("it is a decomposed field of a long parameter\n");
+ break;
+#endif
default:
unreached();
break;
diff --git a/src/jit/lir.h b/src/jit/lir.h
index 762c79c3c3..4a71947be7 100644
--- a/src/jit/lir.h
+++ b/src/jit/lir.h
@@ -112,12 +112,12 @@ public:
GenTree* m_firstNode;
GenTree* m_lastNode;
- ReadOnlyRange(GenTree* firstNode, GenTree* lastNode);
-
ReadOnlyRange(const ReadOnlyRange& other) = delete;
ReadOnlyRange& operator=(const ReadOnlyRange& other) = delete;
public:
+ ReadOnlyRange(GenTree* firstNode, GenTree* lastNode);
+
class Iterator
{
friend class ReadOnlyRange;
@@ -312,6 +312,9 @@ public:
inline void GenTree::SetUnusedValue()
{
gtLIRFlags |= LIR::Flags::UnusedValue;
+#ifndef LEGACY_BACKEND
+ ClearContained();
+#endif
}
inline void GenTree::ClearUnusedValue()
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
index 4b8d602aac..089666c83b 100644
--- a/src/jit/liveness.cpp
+++ b/src/jit/liveness.cpp
@@ -1031,7 +1031,7 @@ void Compiler::fgExtendDbgLifetimes()
#if !defined(_TARGET_64BIT_)
DecomposeLongs::DecomposeRange(this, blockWeight, initRange);
#endif // !defined(_TARGET_64BIT_)
- m_pLowering->LowerRange(std::move(initRange));
+ m_pLowering->LowerRange(block, initRange);
#endif // !LEGACY_BACKEND
// Naively inserting the initializer at the end of the block may add code after the block's
@@ -2348,6 +2348,8 @@ bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, Ge
store = addrUse.User();
value = store->gtGetOp2();
}
+ JITDUMP("Liveness is removing a dead store:\n");
+ DISPNODE(store);
bool isClosed = false;
unsigned sideEffects = 0;
@@ -2357,6 +2359,8 @@ bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, Ge
{
// If the range of the operands contains unrelated code or if it contains any side effects,
// do not remove it. Instead, just remove the store.
+ JITDUMP(" Marking operands as unused:\n");
+ DISPRANGE(operandsRange);
store->VisitOperands([](GenTree* operand) -> GenTree::VisitResult {
operand->SetUnusedValue();
@@ -2372,6 +2376,8 @@ bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, Ge
// Compute the next node to process. Note that we must be careful not to set the next node to
// process to a node that we are about to remove.
+ JITDUMP(" Deleting operands:\n");
+ DISPRANGE(operandsRange);
if (node->OperIsLocalStore())
{
assert(node == store);
@@ -2385,6 +2391,7 @@ bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, Ge
blockRange.Delete(this, compCurBB, std::move(operandsRange));
}
+ JITDUMP("\n");
// If the store is marked as a late argument, it is referenced by a call. Instead of removing it,
// bash it to a NOP.
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 1d23349d29..f40c4aa5a9 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -44,7 +44,6 @@ void Lowering::MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode)
assert(!parentNode->OperIsLeaf());
assert(childNode->canBeContained());
childNode->SetContained();
- m_lsra->clearOperandCounts(childNode);
}
//------------------------------------------------------------------------
@@ -103,7 +102,6 @@ bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode)
//
// Arguments:
// node - the node of interest.
-// useTracked - true if this is being called after liveness so lvTracked is correct
//
// Return value:
// True if this will definitely be a memory reference that could be contained.
@@ -113,11 +111,11 @@ bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode)
// the case of doNotEnregister local. This won't include locals that
// for some other reason do not become register candidates, nor those that get
// spilled.
-// Also, if we call this before we redo liveness analysis, any new lclVars
+// Also, because we usually call this before we redo dataflow, any new lclVars
// introduced after the last dataflow analysis will not yet be marked lvTracked,
// so we don't use that.
//
-bool Lowering::IsContainableMemoryOp(GenTree* node, bool useTracked)
+bool Lowering::IsContainableMemoryOp(GenTree* node)
{
#ifdef _TARGET_XARCH_
if (node->isMemoryOp())
@@ -131,7 +129,7 @@ bool Lowering::IsContainableMemoryOp(GenTree* node, bool useTracked)
return true;
}
LclVarDsc* varDsc = &comp->lvaTable[node->AsLclVar()->gtLclNum];
- return (varDsc->lvDoNotEnregister || (useTracked && !varDsc->lvTracked));
+ return varDsc->lvDoNotEnregister;
}
#endif // _TARGET_XARCH_
return false;
@@ -147,18 +145,64 @@ GenTree* Lowering::LowerNode(GenTree* node)
{
case GT_IND:
TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+ ContainCheckIndir(node->AsIndir());
break;
case GT_STOREIND:
- LowerStoreInd(node);
+ TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+ if (!comp->codeGen->gcInfo.gcIsWriteBarrierAsgNode(node))
+ {
+ LowerStoreIndir(node->AsIndir());
+ }
break;
case GT_ADD:
- return LowerAdd(node);
+ {
+ GenTree* afterTransform = LowerAdd(node);
+ if (afterTransform != nullptr)
+ {
+ return afterTransform;
+ }
+ __fallthrough;
+ }
+
+#if !defined(_TARGET_64BIT_)
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
+#endif
+ case GT_SUB:
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ ContainCheckBinary(node->AsOp());
+ break;
+
+#ifdef _TARGET_XARCH_
+ case GT_NEG:
+ // Codegen of this tree node sets ZF and SF flags.
+ if (!varTypeIsFloating(node))
+ {
+ node->gtFlags |= GTF_ZSF_SET;
+ }
+ break;
+#endif // _TARGET_XARCH_
+
+ case GT_MUL:
+ case GT_MULHI:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
+ ContainCheckMul(node->AsOp());
+ break;
case GT_UDIV:
case GT_UMOD:
- return LowerUnsignedDivOrMod(node->AsOp());
+ if (!LowerUnsignedDivOrMod(node->AsOp()))
+ {
+ ContainCheckDivOrMod(node->AsOp());
+ }
break;
case GT_DIV:
@@ -178,9 +222,16 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_GE:
case GT_EQ:
case GT_NE:
+ case GT_TEST_EQ:
+ case GT_TEST_NE:
+ case GT_CMP:
LowerCompare(node);
break;
+ case GT_JTRUE:
+ ContainCheckJTrue(node->AsOp());
+ break;
+
case GT_JMP:
LowerJmpMethod(node);
break;
@@ -189,68 +240,76 @@ GenTree* Lowering::LowerNode(GenTree* node)
LowerRet(node);
break;
+ case GT_RETURNTRAP:
+ ContainCheckReturnTrap(node->AsOp());
+ break;
+
case GT_CAST:
LowerCast(node);
break;
+#ifdef _TARGET_XARCH_
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ ContainCheckBoundsChk(node->AsBoundsChk());
+ break;
+#endif // _TARGET_XARCH_
case GT_ARR_ELEM:
return LowerArrElem(node);
+ case GT_ARR_OFFSET:
+ ContainCheckArrOffset(node->AsArrOffs());
+ break;
+
case GT_ROL:
case GT_ROR:
LowerRotate(node);
break;
-#ifdef _TARGET_XARCH_
+#ifndef _TARGET_64BIT_
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+ ContainCheckShiftRotate(node->AsOp());
+ break;
+#endif // !_TARGET_64BIT_
+
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+#ifdef _TARGET_XARCH_
LowerShift(node->AsOp());
- break;
+#else
+ ContainCheckShiftRotate(node->AsOp());
#endif
+ break;
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
{
- // TODO-Cleanup: Consider moving this code to LowerBlockStore, which is currently
- // called from TreeNodeInfoInitBlockStore, and calling that method here.
GenTreeBlk* blkNode = node->AsBlk();
TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
+ LowerBlockStore(blkNode);
}
break;
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- if (node->TypeGet() == TYP_SIMD12)
- {
- // GT_SIMD node requiring to produce TYP_SIMD12 in fact
- // produces a TYP_SIMD16 result
- node->gtType = TYP_SIMD16;
- }
+ case GT_LCLHEAP:
+ ContainCheckLclHeap(node->AsOp());
+ break;
#ifdef _TARGET_XARCH_
- if ((node->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicGetItem) && (node->gtGetOp1()->OperGet() == GT_IND))
- {
- // If SIMD vector is already in memory, we force its
- // addr to be evaluated into a reg. This would allow
- // us to generate [regBase] or [regBase+offset] or
- // [regBase+sizeOf(SIMD vector baseType)*regIndex]
- // to access the required SIMD vector element directly
- // from memory.
- //
- // TODO-CQ-XARCH: If addr of GT_IND is GT_LEA, we
- // might be able update GT_LEA to fold the regIndex
- // or offset in some cases. Instead with this
- // approach we always evaluate GT_LEA into a reg.
- // Ideally, we should be able to lower GetItem intrinsic
- // into GT_IND(newAddr) where newAddr combines
- // the addr of SIMD vector with the given index.
- node->gtOp.gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
- }
-#endif
+ case GT_INTRINSIC:
+ ContainCheckIntrinsic(node->AsOp());
break;
-#endif // FEATURE_SIMD
+#endif // _TARGET_XARCH_
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ LowerSIMD(node->AsSIMD());
+ break;
+#endif //
case GT_LCL_VAR:
WidenSIMD12IfNecessary(node->AsLclVarCommon());
@@ -266,7 +325,6 @@ GenTree* Lowering::LowerNode(GenTree* node)
new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, store->TypeGet(), store->gtOp1, nullptr);
store->gtOp1 = bitcast;
BlockRange().InsertBefore(store, bitcast);
- break;
}
}
#endif // _TARGET_AMD64_
@@ -289,6 +347,10 @@ GenTree* Lowering::LowerNode(GenTree* node)
LowerStoreLoc(node->AsLclVarCommon());
break;
+ case GT_LOCKADD:
+ CheckImmedAndMakeContained(node, node->gtOp.gtOp2);
+ break;
+
default:
break;
}
@@ -445,7 +507,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
unsigned blockWeight = originalSwitchBB->getBBWeight(comp);
LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node);
- use.ReplaceWithLclVar(comp, blockWeight);
+ ReplaceWithLclVar(use);
// GT_SWITCH(indexExpression) is now two statements:
// 1. a statement containing 'asg' (for temp = indexExpression)
@@ -907,7 +969,7 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
//
// clang-format on
- putArg = comp->gtNewPutArgReg(type, arg);
+ putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
}
else if (info->structDesc.eightByteCount == 2)
{
@@ -953,10 +1015,13 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
GenTreePtr newOper = comp->gtNewPutArgReg(
comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
info->structDesc.eightByteSizes[ctr]),
- fieldListPtr->gtOp.gtOp1);
+ fieldListPtr->gtOp.gtOp1, (ctr == 0) ? info->regNum : info->otherRegNum);
// Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
+
+ // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
+ fieldListPtr->gtRegNum = REG_NA;
}
// Just return arg. The GT_FIELD_LIST is not replaced.
@@ -979,16 +1044,31 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
GenTreeFieldList* fieldListPtr = arg->AsFieldList();
assert(fieldListPtr->IsFieldListHead());
+ // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+ regNumber argReg = info->regNum;
for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
{
GenTreePtr curOp = fieldListPtr->gtOp.gtOp1;
var_types curTyp = curOp->TypeGet();
// Create a new GT_PUTARG_REG node with op1
- GenTreePtr newOper = comp->gtNewPutArgReg(curTyp, curOp);
+ GenTreePtr newOper = comp->gtNewPutArgReg(curTyp, curOp, argReg);
// Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
+
+ // Update argReg for the next putarg_reg (if any)
+ argReg = genRegArgNext(argReg);
+
+#if defined(_TARGET_ARM_)
+ // A double register is modelled as an even-numbered single one
+ if (fieldListPtr->Current()->TypeGet() == TYP_DOUBLE)
+ {
+ argReg = genRegArgNext(argReg);
+ }
+#endif // _TARGET_ARM_
+ // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
+ fieldListPtr->gtRegNum = REG_NA;
}
// Just return arg. The GT_FIELD_LIST is not replaced.
@@ -999,7 +1079,7 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
#endif // FEATURE_MULTIREG_ARGS
#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
- putArg = comp->gtNewPutArgReg(type, arg);
+ putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
}
}
else
@@ -1195,7 +1275,8 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
(void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
- putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+ putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+ putArg->gtRegNum = info->regNum;
BlockRange().InsertBefore(arg, putArg);
BlockRange().Remove(arg);
@@ -1214,7 +1295,8 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
// Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
(void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
- putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+ putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+ putArg->gtRegNum = info->regNum;
// We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg.
BlockRange().InsertBefore(arg, fieldList, putArg);
@@ -1318,6 +1400,7 @@ void Lowering::LowerCall(GenTree* node)
DISPTREERANGE(BlockRange(), call);
JITDUMP("\n");
+ call->ClearOtherRegs();
LowerArgsForCall(call);
// note that everything generated from this point on runs AFTER the outgoing args are placed
@@ -1420,6 +1503,7 @@ void Lowering::LowerCall(GenTree* node)
}
}
+ ContainCheckRange(resultRange);
BlockRange().InsertBefore(insertionPoint, std::move(resultRange));
call->gtControlExpr = result;
@@ -1430,6 +1514,7 @@ void Lowering::LowerCall(GenTree* node)
CheckVSQuirkStackPaddingNeeded(call);
}
+ ContainCheckCallOperands(call);
JITDUMP("lowering call (after):\n");
DISPTREERANGE(BlockRange(), call);
JITDUMP("\n");
@@ -1817,6 +1902,7 @@ void Lowering::LowerFastTailCall(GenTreeCall* call)
GenTreeLclVar* local =
new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET);
GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local);
+ ContainCheckRange(local, assignExpr);
BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr));
}
}
@@ -1959,6 +2045,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
assert(argEntry->node->gtOper == GT_PUTARG_REG);
GenTree* secondArg = argEntry->node->gtOp.gtOp1;
+ ContainCheckRange(callTargetRange);
BlockRange().InsertAfter(secondArg, std::move(callTargetRange));
bool isClosed;
@@ -1987,6 +2074,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
assert(argEntry->node->gtOper == GT_PUTARG_STK);
GenTree* arg0 = argEntry->node->gtOp.gtOp1;
+ ContainCheckRange(callTargetRange);
BlockRange().InsertAfter(arg0, std::move(callTargetRange));
bool isClosed;
@@ -2116,6 +2204,7 @@ void Lowering::LowerCompare(GenTree* cmp)
{
loCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, loSrc1, loSrc2);
BlockRange().InsertBefore(cmp, loCmp);
+ ContainCheckBinary(loCmp->AsOp());
}
if (hiSrc1->OperIs(GT_CNS_INT))
@@ -2132,10 +2221,12 @@ void Lowering::LowerCompare(GenTree* cmp)
{
hiCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, hiSrc1, hiSrc2);
BlockRange().InsertBefore(cmp, hiCmp);
+ ContainCheckBinary(hiCmp->AsOp());
}
hiCmp = comp->gtNewOperNode(GT_OR, TYP_INT, loCmp, hiCmp);
BlockRange().InsertBefore(cmp, hiCmp);
+ ContainCheckBinary(hiCmp->AsOp());
}
else
{
@@ -2220,12 +2311,15 @@ void Lowering::LowerCompare(GenTree* cmp)
hiCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, hiSrc1, hiSrc2);
BlockRange().InsertBefore(cmp, hiCmp);
+ ContainCheckCompare(hiCmp->AsOp());
}
else
{
loCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, loSrc1, loSrc2);
hiCmp = comp->gtNewOperNode(GT_SUB_HI, TYP_INT, hiSrc1, hiSrc2);
BlockRange().InsertBefore(cmp, loCmp, hiCmp);
+ ContainCheckCompare(loCmp->AsOp());
+ ContainCheckBinary(hiCmp->AsOp());
//
// Try to move the first SUB_HI operands right in front of it, this allows using
@@ -2311,6 +2405,7 @@ void Lowering::LowerCompare(GenTree* cmp)
GenTree* cast = comp->gtNewCastNode(TYP_LONG, *smallerOpUse, TYP_LONG);
*smallerOpUse = cast;
BlockRange().InsertAfter(cast->gtGetOp1(), cast);
+ ContainCheckCast(cast->AsCast());
}
}
}
@@ -2323,7 +2418,7 @@ void Lowering::LowerCompare(GenTree* cmp)
GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon();
ssize_t op2Value = op2->IconValue();
- if (IsContainableMemoryOp(op1, false) && varTypeIsSmall(op1Type) && genTypeCanRepresentValue(op1Type, op2Value))
+ if (IsContainableMemoryOp(op1) && varTypeIsSmall(op1Type) && genTypeCanRepresentValue(op1Type, op2Value))
{
//
// If op1's type is small then try to narrow op2 so it has the same type as op1.
@@ -2353,12 +2448,25 @@ void Lowering::LowerCompare(GenTree* cmp)
// the result of bool returning calls.
//
- if (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() ||
- IsContainableMemoryOp(castOp, false))
+ if (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() || IsContainableMemoryOp(castOp))
{
assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation
- castOp->gtType = castToType;
+ castOp->gtType = castToType;
+ // If we have any contained memory ops on castOp, they must now not be contained.
+ if (castOp->OperIsLogical())
+ {
+ GenTree* op1 = castOp->gtGetOp1();
+ if ((op1 != nullptr) && !op1->IsCnsIntOrI())
+ {
+ op1->ClearContained();
+ }
+ GenTree* op2 = castOp->gtGetOp2();
+ if ((op2 != nullptr) && !op2->IsCnsIntOrI())
+ {
+ op2->ClearContained();
+ }
+ }
cmp->gtOp.gtOp1 = castOp;
op2->gtType = castToType;
@@ -2398,8 +2506,11 @@ void Lowering::LowerCompare(GenTree* cmp)
cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE);
cmp->gtOp.gtOp1 = andOp1;
cmp->gtOp.gtOp2 = andOp2;
+ // We will re-evaluate containment below
+ andOp1->ClearContained();
+ andOp2->ClearContained();
- if (IsContainableMemoryOp(andOp1, false) && andOp2->IsIntegralConst())
+ if (IsContainableMemoryOp(andOp1) && andOp2->IsIntegralConst())
{
//
// For "test" we only care about the bits that are set in the second operand (mask).
@@ -2450,6 +2561,7 @@ void Lowering::LowerCompare(GenTree* cmp)
}
}
#endif // _TARGET_XARCH_
+ ContainCheckCompare(cmp->AsOp());
}
// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
@@ -2493,6 +2605,7 @@ void Lowering::LowerRet(GenTree* ret)
{
InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret));
}
+ ContainCheckRet(ret->AsOp());
}
GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
@@ -2648,6 +2761,7 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
assert(thisArgNode->gtOper == GT_PUTARG_REG);
GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1;
+ GenTree* thisExpr = originalThisExpr;
// We're going to use the 'this' expression multiple times, so make a local to copy it.
@@ -2670,21 +2784,21 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call"));
LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode);
- thisExprUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), delegateInvokeTmp);
+ ReplaceWithLclVar(thisExprUse, delegateInvokeTmp);
- originalThisExpr = thisExprUse.Def(); // it's changed; reload it.
- lclNum = delegateInvokeTmp;
+ thisExpr = thisExprUse.Def(); // it's changed; reload it.
+ lclNum = delegateInvokeTmp;
}
// replace original expression feeding into thisPtr with
// [originalThis + offsetOfDelegateInstance]
GenTree* newThisAddr = new (comp, GT_LEA)
- GenTreeAddrMode(TYP_REF, originalThisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
+ GenTreeAddrMode(TYP_REF, thisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
- BlockRange().InsertAfter(originalThisExpr, newThisAddr, newThis);
+ BlockRange().InsertAfter(thisExpr, newThisAddr, newThis);
thisArgNode->gtOp.gtOp1 = newThis;
@@ -2779,11 +2893,9 @@ GenTree* Lowering::SetGCState(int state)
GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1);
- GenTree* storeGcState = new (comp, GT_STOREIND)
- GenTreeStoreInd(TYP_BYTE,
- new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState),
- new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state));
-
+ GenTree* stateNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state);
+ GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState);
+ GenTree* storeGcState = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_BYTE, addr, stateNode);
return storeGcState;
}
@@ -2966,6 +3078,7 @@ void Lowering::InsertPInvokeMethodProlog()
// The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+ ContainCheckStoreIndir(frameUpd->AsIndir());
DISPTREERANGE(firstBlockRange, frameUpd);
}
#endif // _TARGET_64BIT_
@@ -3030,6 +3143,7 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePt
// That is [tcb + offsetOfGcState] = 1
GenTree* storeGCState = SetGCState(1);
returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState));
+ ContainCheckStoreIndir(storeGCState->AsIndir());
// Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do
// this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call.
@@ -3041,6 +3155,7 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePt
{
GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+ ContainCheckStoreIndir(frameUpd->AsIndir());
}
}
@@ -3148,7 +3263,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
callFrameInfo.offsetOfCallTarget);
store->gtOp1 = src;
- BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, store));
+ InsertTreeBeforeAndContainCheck(insertBefore, store);
}
#ifdef _TARGET_X86_
@@ -3161,7 +3276,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE);
- BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeCallSiteSP));
+ InsertTreeBeforeAndContainCheck(insertBefore, storeCallSiteSP);
#endif
@@ -3178,7 +3293,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
labelRef->gtType = TYP_I_IMPL;
storeLab->gtOp1 = labelRef;
- BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeLab));
+ InsertTreeBeforeAndContainCheck(insertBefore, storeLab);
// Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method
// contains PInvokes; on 64-bit targets this is necessary in non-stubs.
@@ -3194,6 +3309,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
// Stubs do this once per stub, not once per call.
GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
+ ContainCheckStoreIndir(frameUpd->AsIndir());
}
#endif // _TARGET_64BIT_
@@ -3204,6 +3320,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
GenTree* storeGCState = SetGCState(0);
BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState));
+ ContainCheckStoreIndir(storeGCState->AsIndir());
}
//------------------------------------------------------------------------
@@ -3229,11 +3346,12 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
frameAddr->SetOperRaw(GT_LCL_VAR_ADDR);
// Insert call to CORINFO_HELP_JIT_PINVOKE_END
- GenTree* helperCall =
+ GenTreeCall* helperCall =
comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
comp->fgMorphTree(helperCall);
BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall));
+ ContainCheckCallOperands(helperCall);
return;
}
@@ -3242,9 +3360,11 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
GenTree* tree = SetGCState(1);
BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+ ContainCheckStoreIndir(tree->AsIndir());
tree = CreateReturnTrapSeq();
BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+ ContainCheckReturnTrap(tree->AsOp());
// Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets thi
// happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive.
@@ -3255,6 +3375,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
{
tree = CreateFrameLinkUpdate(PopFrame);
BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+ ContainCheckStoreIndir(tree->AsIndir());
}
#else
const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
@@ -3271,6 +3392,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
storeCallSiteTracker->gtOp1 = constantZero;
BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker);
+ ContainCheckStoreLoc(storeCallSiteTracker);
#endif // _TARGET_64BIT_
}
@@ -3438,7 +3560,7 @@ GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
}
LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node);
- thisPtrUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), vtableCallTemp);
+ ReplaceWithLclVar(thisPtrUse, vtableCallTemp);
lclNum = vtableCallTemp;
}
@@ -3582,6 +3704,7 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
ind->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
BlockRange().InsertAfter(call->gtCallAddr, ind);
+ ContainCheckIndir(ind->AsIndir());
call->gtCallAddr = ind;
}
else
@@ -3845,6 +3968,15 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset);
+ // Neither the base nor the index should now be contained.
+ if (base != nullptr)
+ {
+ base->ClearContained();
+ }
+ if (index != nullptr)
+ {
+ index->ClearContained();
+ }
addrMode->gtRsvdRegs = addr->gtRsvdRegs;
addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS);
addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free.
@@ -3871,44 +4003,34 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
// node - the node we care about
//
// Returns:
-// The next node to lower.
+// The next node to lower if we have transformed the ADD; nullptr otherwise.
//
GenTree* Lowering::LowerAdd(GenTree* node)
{
GenTree* next = node->gtNext;
-#ifdef _TARGET_ARMARCH_
- // For ARM architectures we don't have the LEA instruction
- // therefore we won't get much benefit from doing this.
- return next;
-#else // _TARGET_ARMARCH_
- if (!varTypeIsIntegralOrI(node))
+#ifndef _TARGET_ARMARCH_
+ if (varTypeIsIntegralOrI(node))
{
- return next;
- }
-
- LIR::Use use;
- if (!BlockRange().TryGetUse(node, &use))
- {
- return next;
- }
-
- // if this is a child of an indir, let the parent handle it.
- GenTree* parent = use.User();
- if (parent->OperIsIndir())
- {
- return next;
- }
-
- // if there is a chain of adds, only look at the topmost one
- if (parent->gtOper == GT_ADD)
- {
- return next;
+ LIR::Use use;
+ if (BlockRange().TryGetUse(node, &use))
+ {
+ // If this is a child of an indir, let the parent handle it.
+ // If there is a chain of adds, only look at the topmost one.
+ GenTree* parent = use.User();
+ if (!parent->OperIsIndir() && (parent->gtOper != GT_ADD))
+ {
+ GenTree* addr = TryCreateAddrMode(std::move(use), false);
+ if (addr != node)
+ {
+ return addr->gtNext;
+ }
+ }
+ }
}
-
- GenTree* addr = TryCreateAddrMode(std::move(use), false);
- return addr->gtNext;
#endif // !_TARGET_ARMARCH_
+
+ return nullptr;
}
//------------------------------------------------------------------------
@@ -3917,12 +4039,16 @@ GenTree* Lowering::LowerAdd(GenTree* node)
// Arguments:
// divMod - pointer to the GT_UDIV/GT_UMOD node to be lowered
//
+// Return Value:
+// Returns a boolean indicating whether the node was transformed.
+//
// Notes:
// - Transform UDIV/UMOD by power of 2 into RSZ/AND
// - Transform UDIV by constant >= 2^(N-1) into GE
// - Transform UDIV/UMOD by constant >= 3 into "magic division"
+//
-GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
+bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
{
assert(divMod->OperIs(GT_UDIV, GT_UMOD));
@@ -3933,13 +4059,13 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
#if !defined(_TARGET_64BIT_)
if (dividend->OperIs(GT_LONG))
{
- return next;
+ return false;
}
#endif
if (!divisor->IsCnsIntOrI())
{
- return next;
+ return false;
}
if (dividend->IsCnsIntOrI())
@@ -3947,7 +4073,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
// We shouldn't see a divmod with constant operands here but if we do then it's likely
// because optimizations are disabled or it's a case that's supposed to throw an exception.
// Don't optimize this.
- return next;
+ return false;
}
const var_types type = divMod->TypeGet();
@@ -3964,7 +4090,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
if (divisorValue == 0)
{
- return next;
+ return false;
}
const bool isDiv = divMod->OperIs(GT_UDIV);
@@ -3985,11 +4111,10 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
}
divMod->SetOper(newOper);
- divisor->AsIntCon()->SetIconValue(divisorValue);
-
- return next;
+ divisor->gtIntCon.SetIconValue(divisorValue);
+ ContainCheckNode(divMod);
+ return true;
}
-
if (isDiv)
{
// If the divisor is greater or equal than 2^(N - 1) then the result is 1
@@ -3999,7 +4124,8 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
{
divMod->SetOper(GT_GE);
divMod->gtFlags |= GTF_UNSIGNED;
- return next;
+ ContainCheckNode(divMod);
+ return true;
}
}
@@ -4038,7 +4164,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
if (requiresDividendMultiuse)
{
LIR::Use dividendUse(BlockRange(), &divMod->gtOp1, divMod);
- dividendLclNum = dividendUse.ReplaceWithLclVar(comp, curBBWeight);
+ dividendLclNum = ReplaceWithLclVar(dividendUse);
dividend = divMod->gtGetOp1();
}
@@ -4050,6 +4176,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
mulhi->gtFlags |= GTF_UNSIGNED;
divisor->AsIntCon()->SetIconValue(magic);
BlockRange().InsertBefore(divMod, mulhi);
+ GenTree* firstNode = mulhi;
if (requiresAdjustment)
{
@@ -4063,7 +4190,7 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
BlockRange().InsertBefore(divMod, one, rsz);
LIR::Use mulhiUse(BlockRange(), &sub->gtOp.gtOp2, sub);
- unsigned mulhiLclNum = mulhiUse.ReplaceWithLclVar(comp, curBBWeight);
+ unsigned mulhiLclNum = ReplaceWithLclVar(mulhiUse);
GenTree* mulhiCopy = comp->gtNewLclvNode(mulhiLclNum, type);
GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhiCopy);
@@ -4099,31 +4226,30 @@ GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
BlockRange().InsertBefore(divMod, div, divisor, mul, dividend);
comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
}
+ ContainCheckRange(firstNode, divMod);
- return mulhi;
+ return true;
}
#endif
-
- return next;
+ return false;
}
-//------------------------------------------------------------------------
-// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
-// const divisor into equivalent but faster sequences.
+// LowerConstIntDivOrMod: Transform integer GT_DIV/GT_MOD nodes with a power of 2
+// const divisor into equivalent but faster sequences.
//
// Arguments:
-// node - pointer to node we care about
+// node - pointer to the DIV or MOD node
//
// Returns:
// The next node to lower.
//
-GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
+GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
{
assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
-
- GenTree* next = node->gtNext;
- GenTree* divMod = node;
- GenTree* divisor = divMod->gtGetOp2();
+ GenTree* next = node->gtNext;
+ GenTree* divMod = node;
+ GenTree* dividend = divMod->gtGetOp1();
+ GenTree* divisor = divMod->gtGetOp2();
if (!divisor->IsCnsIntOrI())
{
@@ -4133,8 +4259,6 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
const var_types type = divMod->TypeGet();
assert((type == TYP_INT) || (type == TYP_LONG));
- GenTree* dividend = divMod->gtGetOp1();
-
if (dividend->IsCnsIntOrI())
{
// We shouldn't see a divmod with constant operands here but if we do then it's likely
@@ -4168,6 +4292,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
// If the divisor is the minimum representable integer value then we can use a compare,
// the result is 1 iff the dividend equals divisor.
divMod->SetOper(GT_EQ);
+ ContainCheckCompare(divMod->AsOp());
return next;
}
}
@@ -4229,7 +4354,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
if (requiresDividendMultiuse)
{
LIR::Use dividendUse(BlockRange(), &mulhi->gtOp.gtOp2, mulhi);
- dividendLclNum = dividendUse.ReplaceWithLclVar(comp, curBBWeight);
+ dividendLclNum = ReplaceWithLclVar(dividendUse);
}
GenTree* adjusted;
@@ -4252,7 +4377,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
BlockRange().InsertBefore(divMod, shiftBy, signBit);
LIR::Use adjustedUse(BlockRange(), &signBit->gtOp.gtOp1, signBit);
- unsigned adjustedLclNum = adjustedUse.ReplaceWithLclVar(comp, curBBWeight);
+ unsigned adjustedLclNum = ReplaceWithLclVar(adjustedUse);
adjusted = comp->gtNewLclvNode(adjustedLclNum, type);
comp->lvaTable[adjustedLclNum].incRefCnts(curBBWeight, comp);
BlockRange().InsertBefore(divMod, adjusted);
@@ -4307,7 +4432,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod);
- opDividend.ReplaceWithLclVar(comp, curBBWeight);
+ ReplaceWithLclVar(opDividend);
dividend = divMod->gtGetOp1();
assert(dividend->OperGet() == GT_LCL_VAR);
@@ -4340,11 +4465,13 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue));
newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor);
+ ContainCheckShiftRotate(newDivMod->AsOp());
if (divisorValue < 0)
{
// negate the result if the divisor is negative
newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod);
+ ContainCheckNode(newDivMod);
}
}
else
@@ -4356,6 +4483,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
newDivMod = comp->gtNewOperNode(GT_SUB, type, comp->gtNewLclvNode(dividendLclNum, type),
comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor));
+ ContainCheckBinary(newDivMod->AsOp());
comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
}
@@ -4366,7 +4494,7 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
BlockRange().Remove(dividend);
// linearize and insert the new tree before the original divMod node
- BlockRange().InsertBefore(divMod, LIR::SeqTree(comp, newDivMod));
+ InsertTreeBeforeAndContainCheck(divMod, newDivMod);
BlockRange().Remove(divMod);
// replace the original divmod node with the new divmod tree
@@ -4374,24 +4502,37 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
return newDivMod->gtNext;
}
-
//------------------------------------------------------------------------
-// LowerStoreInd: attempt to transform an indirect store to use an
-// addressing mode
+// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
+// const divisor into equivalent but faster sequences.
//
// Arguments:
-// node - the node we care about
+// node - the DIV or MOD node
+//
+// Returns:
+// The next node to lower.
//
-void Lowering::LowerStoreInd(GenTree* node)
+GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
{
- assert(node != nullptr);
- assert(node->OperGet() == GT_STOREIND);
+ assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
+ GenTree* next = node->gtNext;
+ GenTree* divMod = node;
+ GenTree* dividend = divMod->gtGetOp1();
+ GenTree* divisor = divMod->gtGetOp2();
+
+#ifdef _TARGET_XARCH_
+ if (!varTypeIsFloating(node->TypeGet()))
+#endif // _TARGET_XARCH_
+ {
+ next = LowerConstIntDivOrMod(node);
+ }
- TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+ if ((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD))
+ {
+ ContainCheckDivOrMod(node->AsOp());
+ }
- // Mark all GT_STOREIND nodes to indicate that it is not known
- // whether it represents a RMW memory op.
- node->AsStoreInd()->SetRMWStatusDefault();
+ return next;
}
void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node)
@@ -4503,7 +4644,7 @@ GenTree* Lowering::LowerArrElem(GenTree* node)
if (!arrElem->gtArrObj->IsLocal())
{
LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem);
- arrObjUse.ReplaceWithLclVar(comp, blockWeight);
+ ReplaceWithLclVar(arrObjUse);
}
GenTree* arrObjNode = arrElem->gtArrObj;
@@ -4514,6 +4655,7 @@ GenTree* Lowering::LowerArrElem(GenTree* node)
// The first ArrOffs node will have 0 for the offset of the previous dimension.
GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
BlockRange().InsertBefore(insertionPoint, prevArrOffs);
+ GenTree* nextToLower = prevArrOffs;
for (unsigned char dim = 0; dim < rank; dim++)
{
@@ -4589,7 +4731,7 @@ GenTree* Lowering::LowerArrElem(GenTree* node)
DISPTREERANGE(BlockRange(), leaNode);
JITDUMP("\n\n");
- return leaNode;
+ return nextToLower;
}
void Lowering::DoPhase()
@@ -4711,10 +4853,13 @@ void Lowering::DoPhase()
#ifdef DEBUG
node->gtSeqNum = currentLoc;
+ // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register
+ // value.
+ // Although this looks like a no-op it sets the tag.
+ node->gtRegNum = node->gtRegNum;
#endif
node->gtLsraInfo.Initialize(m_lsra, node, currentLoc);
- node->gtClearReg(comp);
currentLoc += 2;
@@ -4771,6 +4916,7 @@ void Lowering::CheckCallArg(GenTree* arg)
case GT_FIELD_LIST:
{
GenTreeFieldList* list = arg->AsFieldList();
+ assert(list->isContained());
assert(list->IsFieldListHead());
for (; list != nullptr; list = list->Rest())
@@ -5133,6 +5279,122 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
//------------------------------------------------------------------------
+// Containment Analysis
+//------------------------------------------------------------------------
+void Lowering::ContainCheckNode(GenTree* node)
+{
+ switch (node->gtOper)
+ {
+ case GT_STORE_LCL_VAR:
+ case GT_STORE_LCL_FLD:
+ ContainCheckStoreLoc(node->AsLclVarCommon());
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ case GT_TEST_EQ:
+ case GT_TEST_NE:
+ case GT_CMP:
+ ContainCheckCompare(node->AsOp());
+ break;
+
+ case GT_JTRUE:
+ ContainCheckJTrue(node->AsOp());
+ break;
+
+ case GT_ADD:
+ case GT_SUB:
+#if !defined(_TARGET_64BIT_)
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
+#endif
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ ContainCheckBinary(node->AsOp());
+ break;
+
+#ifdef _TARGET_XARCH_
+ case GT_NEG:
+ // Codegen of this tree node sets ZF and SF flags.
+ if (!varTypeIsFloating(node))
+ {
+ node->gtFlags |= GTF_ZSF_SET;
+ }
+ break;
+#endif // _TARGET_XARCH_
+
+#if defined(_TARGET_X86_)
+ case GT_MUL_LONG:
+#endif
+ case GT_MUL:
+ case GT_MULHI:
+ ContainCheckMul(node->AsOp());
+ break;
+ case GT_DIV:
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ ContainCheckDivOrMod(node->AsOp());
+ break;
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+#ifndef _TARGET_64BIT_
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+#endif
+ ContainCheckShiftRotate(node->AsOp());
+ break;
+ case GT_ARR_OFFSET:
+ ContainCheckArrOffset(node->AsArrOffs());
+ break;
+ case GT_LCLHEAP:
+ ContainCheckLclHeap(node->AsOp());
+ break;
+ case GT_RETURN:
+ ContainCheckRet(node->AsOp());
+ break;
+ case GT_RETURNTRAP:
+ ContainCheckReturnTrap(node->AsOp());
+ break;
+ case GT_STOREIND:
+ ContainCheckStoreIndir(node->AsIndir());
+ case GT_IND:
+ ContainCheckIndir(node->AsIndir());
+ break;
+ case GT_PUTARG_REG:
+ case GT_PUTARG_STK:
+#ifdef _TARGET_ARM_
+ case GT_PUTARG_SPLIT:
+#endif
+ // The regNum must have been set by the lowering of the call.
+ assert(node->gtRegNum != REG_NA);
+ break;
+#ifdef _TARGET_XARCH_
+ case GT_INTRINSIC:
+ ContainCheckIntrinsic(node->AsOp());
+ break;
+#endif // _TARGET_XARCH_
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ ContainCheckSIMD(node->AsSIMD());
+ break;
+#endif // FEATURE_SIMD
+ default:
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
// GetIndirSourceCount: Get the source registers for an indirection that might be contained.
//
// Arguments:
@@ -5189,7 +5451,7 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
// everything is made explicit by adding casts.
assert(dividend->TypeGet() == divisor->TypeGet());
- if (IsContainableMemoryOp(divisor, true) || divisor->IsCnsNonZeroFltOrDbl())
+ if (IsContainableMemoryOp(divisor) || divisor->IsCnsNonZeroFltOrDbl())
{
MakeSrcContained(node, divisor);
}
@@ -5211,7 +5473,7 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
#endif
// divisor can be an r/m, but the memory indirection must be of the same size as the divide
- if (IsContainableMemoryOp(divisor, true) && (divisor->TypeGet() == node->TypeGet()))
+ if (IsContainableMemoryOp(divisor) && (divisor->TypeGet() == node->TypeGet()))
{
MakeSrcContained(node, divisor);
}
@@ -5232,12 +5494,14 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
//
void Lowering::ContainCheckReturnTrap(GenTreeOp* node)
{
+#ifdef _TARGET_XARCH_
assert(node->OperIs(GT_RETURNTRAP));
// This just turns into a compare of its child with an int + a conditional call
if (node->gtOp1->isIndir())
{
MakeSrcContained(node, node->gtOp1);
}
+#endif // _TARGET_XARCH_
}
//------------------------------------------------------------------------
@@ -5311,7 +5575,6 @@ void Lowering::ContainCheckRet(GenTreeOp* ret)
#endif // FEATURE_MULTIREG_RET
}
-#ifdef FEATURE_SIMD
//------------------------------------------------------------------------
// ContainCheckJTrue: determine whether the source of a JTRUE should be contained.
//
@@ -5320,6 +5583,11 @@ void Lowering::ContainCheckRet(GenTreeOp* ret)
//
void Lowering::ContainCheckJTrue(GenTreeOp* node)
{
+ // The compare does not need to be generated into a register.
+ GenTree* cmp = node->gtGetOp1();
+ cmp->gtLsraInfo.isNoRegCompare = true;
+
+#ifdef FEATURE_SIMD
assert(node->OperIs(GT_JTRUE));
// Say we have the following IR
@@ -5329,7 +5597,6 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node)
//
// In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality
// intrinsic will set or clear the Zero flag.
- GenTree* cmp = node->gtGetOp1();
genTreeOps cmpOper = cmp->OperGet();
if (cmpOper == GT_EQ || cmpOper == GT_NE)
{
@@ -5340,12 +5607,36 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node)
{
// We always generate code for a SIMD equality comparison, though it produces no value.
// Neither the GT_JTRUE nor the immediate need to be evaluated.
- m_lsra->clearOperandCounts(cmp);
MakeSrcContained(cmp, cmpOp2);
+ cmpOp1->gtLsraInfo.isNoRegCompare = true;
+ // We have to reverse compare oper in the following cases:
+ // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
+ // Therefore, if compare oper is == or != against false(0), we will
+ // be checking opposite of what is required.
+ //
+ // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
+ // Therefore, if compare oper is == or != against true(1), we will
+ // be checking opposite of what is required.
+ GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
+ if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
+ {
+ if (cmpOp2->IsIntegralConst(0))
+ {
+ cmp->SetOper(GenTree::ReverseRelop(cmpOper));
+ }
+ }
+ else
+ {
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
+ if (cmpOp2->IsIntegralConst(1))
+ {
+ cmp->SetOper(GenTree::ReverseRelop(cmpOper));
+ }
+ }
}
}
-}
#endif // FEATURE_SIMD
+}
#ifdef DEBUG
void Lowering::DumpNodeInfoMap()
diff --git a/src/jit/lower.h b/src/jit/lower.h
index f09df7a836..ebee64117b 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -47,35 +47,77 @@ public:
static void getCastDescription(GenTreePtr treeNode, CastInfo* castInfo);
+ // This variant of LowerRange is called from outside of the main Lowering pass,
+ // so it creates its own instance of Lowering to do so.
+ void LowerRange(BasicBlock* block, LIR::ReadOnlyRange& range)
+ {
+ Lowering lowerer(comp, m_lsra);
+ lowerer.m_block = block;
+
+ lowerer.LowerRange(range);
+ }
+
+private:
// LowerRange handles new code that is introduced by or after Lowering.
- void LowerRange(LIR::Range&& range)
+ void LowerRange(LIR::ReadOnlyRange& range)
{
for (GenTree* newNode : range)
{
LowerNode(newNode);
}
}
+ void LowerRange(GenTree* firstNode, GenTree* lastNode)
+ {
+ LIR::ReadOnlyRange range(firstNode, lastNode);
+ LowerRange(range);
+ }
+
+ // ContainCheckRange handles new code that is introduced by or after Lowering,
+ // and that is known to be already in Lowered form.
+ void ContainCheckRange(LIR::ReadOnlyRange& range)
+ {
+ for (GenTree* newNode : range)
+ {
+ ContainCheckNode(newNode);
+ }
+ }
+ void ContainCheckRange(GenTree* firstNode, GenTree* lastNode)
+ {
+ LIR::ReadOnlyRange range(firstNode, lastNode);
+ ContainCheckRange(range);
+ }
+
+ void InsertTreeBeforeAndContainCheck(GenTree* insertionPoint, GenTree* tree)
+ {
+ LIR::Range range = LIR::SeqTree(comp, tree);
+ ContainCheckRange(range);
+ BlockRange().InsertBefore(insertionPoint, std::move(range));
+ }
+
+ void ContainCheckNode(GenTree* node);
-private:
void ContainCheckDivOrMod(GenTreeOp* node);
void ContainCheckReturnTrap(GenTreeOp* node);
void ContainCheckArrOffset(GenTreeArrOffs* node);
void ContainCheckLclHeap(GenTreeOp* node);
void ContainCheckRet(GenTreeOp* node);
- void ContainCheckBinary(GenTreeOp* node);
+ void ContainCheckJTrue(GenTreeOp* node);
+
+ void ContainCheckCallOperands(GenTreeCall* call);
+ void ContainCheckIndir(GenTreeIndir* indirNode);
+ void ContainCheckStoreIndir(GenTreeIndir* indirNode);
void ContainCheckMul(GenTreeOp* node);
void ContainCheckShiftRotate(GenTreeOp* node);
void ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc);
- void ContainCheckIndir(GenTreeIndir* indirNode);
void ContainCheckCast(GenTreeCast* node);
void ContainCheckCompare(GenTreeOp* node);
+ void ContainCheckBinary(GenTreeOp* node);
void ContainCheckBoundsChk(GenTreeBoundsChk* node);
#ifdef _TARGET_XARCH_
void ContainCheckFloatBinary(GenTreeOp* node);
void ContainCheckIntrinsic(GenTreeOp* node);
#endif // _TARGET_XARCH_
#ifdef FEATURE_SIMD
- void ContainCheckJTrue(GenTreeOp* node);
void ContainCheckSIMD(GenTreeSIMD* simdNode);
#endif // FEATURE_SIMD
@@ -153,6 +195,21 @@ private:
return new (comp, GT_LEA) GenTreeAddrMode(resultType, base, index, 0, 0);
}
+ // Replace the definition of the given use with a lclVar, allocating a new temp
+ // if 'tempNum' is BAD_VAR_NUM.
+ unsigned ReplaceWithLclVar(LIR::Use& use, unsigned tempNum = BAD_VAR_NUM)
+ {
+ GenTree* oldUseNode = use.Def();
+ if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM))
+ {
+ unsigned newLclNum = use.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), tempNum);
+ GenTree* newUseNode = use.Def();
+ ContainCheckRange(oldUseNode->gtNext, newUseNode);
+ return newLclNum;
+ }
+ return oldUseNode->AsLclVarCommon()->gtLclNum;
+ }
+
// returns true if the tree can use the read-modify-write memory instruction form
bool isRMWRegOper(GenTreePtr tree);
@@ -236,12 +293,12 @@ private:
int GetOperandSourceCount(GenTree* node);
int GetIndirSourceCount(GenTreeIndir* indirTree);
+ void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
void TreeNodeInfoInitStoreLoc(GenTree* tree);
void TreeNodeInfoInitReturn(GenTree* tree);
void TreeNodeInfoInitShiftRotate(GenTree* tree);
- void TreeNodeInfoInitPutArgReg(
- GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs);
+ void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node);
void TreeNodeInfoInitCall(GenTreeCall* call);
void TreeNodeInfoInitCmp(GenTreePtr tree);
void TreeNodeInfoInitStructArg(GenTreePtr structArg);
@@ -251,36 +308,37 @@ private:
void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree);
void TreeNodeInfoInitIndir(GenTreeIndir* indirTree);
void TreeNodeInfoInitGCWriteBarrier(GenTree* tree);
-#if !CPU_LOAD_STORE_ARCH
- bool TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd);
-#endif
+ void TreeNodeInfoInitCast(GenTree* tree);
+
+#if defined(_TARGET_XARCH_)
+ void TreeNodeInfoInitMul(GenTreePtr tree);
+ void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
+#endif // defined(_TARGET_XARCH_)
+
#ifdef FEATURE_SIMD
- void TreeNodeInfoInitSIMD(GenTree* tree);
+ void TreeNodeInfoInitSIMD(GenTreeSIMD* tree);
#endif // FEATURE_SIMD
- void TreeNodeInfoInitCast(GenTree* tree);
+
+ void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode);
#ifdef _TARGET_ARM64_
void LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
- void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
#endif // _TARGET_ARM64_
#ifdef _TARGET_ARM_
void LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
- void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
#endif // _TARGET_ARM64_
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
void LowerPutArgStk(GenTreePutArgStk* tree);
- void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree);
#ifdef _TARGET_ARM_
- void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree, TreeNodeInfo& info, fgArgTabEntryPtr argInfo);
+ void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree);
#endif
-#endif // FEATURE_PUT_STRUCT_ARG_STK
void TreeNodeInfoInitLclHeap(GenTree* tree);
void DumpNodeInfoMap();
// Per tree node member functions
- void LowerStoreInd(GenTree* node);
+ void LowerStoreIndir(GenTreeIndir* node);
GenTree* LowerAdd(GenTree* node);
- GenTree* LowerUnsignedDivOrMod(GenTreeOp* divMod);
+ bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
+ GenTree* LowerConstIntDivOrMod(GenTree* node);
GenTree* LowerSignedDivOrMod(GenTree* node);
void LowerBlockStore(GenTreeBlk* blkNode);
@@ -290,11 +348,6 @@ private:
GenTree* LowerSwitch(GenTree* node);
void LowerCast(GenTree* node);
-#if defined(_TARGET_XARCH_)
- void TreeNodeInfoInitMul(GenTreePtr tree);
- void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
-#endif // defined(_TARGET_XARCH_)
-
#if !CPU_LOAD_STORE_ARCH
bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd);
bool IsBinOpInRMWStoreInd(GenTreePtr tree);
@@ -307,6 +360,9 @@ private:
GenTree* LowerArrElem(GenTree* node);
void LowerRotate(GenTree* tree);
void LowerShift(GenTreeOp* shift);
+#ifdef FEATURE_SIMD
+ void LowerSIMD(GenTreeSIMD* simdNode);
+#endif // FEATURE_SIMD
// Utility functions
void MorphBlkIntoHelperCall(GenTreePtr pTree, GenTreePtr treeStmt);
@@ -325,7 +381,7 @@ private:
bool IsContainableImmed(GenTree* parentNode, GenTree* childNode);
// Return true if 'node' is a containable memory op.
- bool IsContainableMemoryOp(GenTree* node, bool useTracked);
+ bool IsContainableMemoryOp(GenTree* node);
// Makes 'childNode' contained in the 'parentNode'
void MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode);
diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp
index 7104577839..f944b42a05 100644
--- a/src/jit/lowerarmarch.cpp
+++ b/src/jit/lowerarmarch.cpp
@@ -218,6 +218,20 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
//------------------------------------------------------------------------
+// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained.
+//
+// Arguments:
+// node - The indirect store node (GT_STORE_IND) of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::LowerStoreIndir(GenTreeIndir* node)
+{
+ ContainCheckStoreIndir(node);
+}
+
+//------------------------------------------------------------------------
// LowerBlockStore: Set block store type
//
// Arguments:
@@ -255,6 +269,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
GenTreePtr initVal = source;
if (initVal->OperIsInitVal())
{
+ initVal->SetContained();
initVal = initVal->gtGetOp1();
}
srcAddrOrFill = initVal;
@@ -276,7 +291,11 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
// the largest width store of the desired inline expansion.
ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
- if (size < REGSIZE_BYTES)
+ if (fill == 0)
+ {
+ MakeSrcContained(blkNode, source);
+ }
+ else if (size < REGSIZE_BYTES)
{
initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
}
@@ -348,6 +367,16 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
}
}
+ // CopyObj or CopyBlk
+ if (source->gtOper == GT_IND)
+ {
+ MakeSrcContained(blkNode, source);
+ }
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
+ {
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
+ }
}
}
@@ -453,6 +482,7 @@ void Lowering::LowerRotate(GenTreePtr tree)
}
tree->ChangeOper(GT_ROR);
}
+ ContainCheckShiftRotate(tree->AsOp());
}
//------------------------------------------------------------------------
@@ -460,31 +490,97 @@ void Lowering::LowerRotate(GenTreePtr tree)
//------------------------------------------------------------------------
//------------------------------------------------------------------------
-// ContainCheckIndir: Determine whether operands of an indir should be contained.
+// ContainCheckCallOperands: Determine whether operands of a call should be contained.
//
// Arguments:
-// node - The indirection node of interest
-//
-// Notes:
-// This is called for both store and load indirections.
+// call - The call node of interest
//
// Return Value:
// None.
//
-void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
+void Lowering::ContainCheckCallOperands(GenTreeCall* call)
{
-#ifdef _TARGET_ARM64_
- if (indirNode->OperIs(GT_STOREIND))
+ GenTree* ctrlExpr = call->gtControlExpr;
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
{
- GenTree* src = indirNode->gtOp.gtOp2;
- if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
+ GenTreePtr thisPtrNode = call->gtCallObjp;
+
+ if (thisPtrNode->canBeContained())
+ {
+ MakeSrcContained(call, thisPtrNode);
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ MakeSrcContained(call, thisPtrNode->gtOp.gtOp1);
+ }
+ }
+ }
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+ if (!(args->gtFlags & GTF_LATE_ARG))
{
- // an integer zero for 'src' can be contained.
- MakeSrcContained(indirNode, src);
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+ if (arg->gtOper == GT_PUTARG_STK)
+ {
+ GenTreePtr putArgChild = arg->gtOp.gtOp1;
+ if (putArgChild->OperGet() == GT_FIELD_LIST)
+ {
+ MakeSrcContained(arg, putArgChild);
+ }
+ else if (putArgChild->OperGet() == GT_OBJ)
+ {
+ MakeSrcContained(arg, putArgChild);
+ GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+ if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+ // as one contained operation
+ //
+ MakeSrcContained(putArgChild, objChild);
+ }
+ }
+ }
}
+ args = args->gtOp.gtOp2;
+ }
+}
+
+//------------------------------------------------------------------------
+// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained.
+//
+// Arguments:
+// node - pointer to the node
+//
+void Lowering::ContainCheckStoreIndir(GenTreeIndir* node)
+{
+#ifdef _TARGET_ARM64_
+ GenTree* src = node->gtOp.gtOp2;
+ if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
+ {
+ // an integer zero for 'src' can be contained.
+ MakeSrcContained(node, src);
}
#endif // _TARGET_ARM64_
+ ContainCheckIndir(node);
+}
+//------------------------------------------------------------------------
+// ContainCheckIndir: Determine whether operands of an indir should be contained.
+//
+// Arguments:
+// indirNode - The indirection node of interest
+//
+// Notes:
+// This is called for both store and load indirections.
+//
+// Return Value:
+// None.
+//
+void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
+{
// If this is the rhs of a block copy it will be handled when we handle the store.
if (indirNode->TypeGet() == TYP_STRUCT)
{
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 1c679478aa..b8f9b9d72c 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -30,8 +30,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
// xarch supports both ROL and ROR instructions so no lowering is required.
-void Lowering::LowerRotate(GenTreePtr tree)
+void Lowering::LowerRotate(GenTree* tree)
{
+ ContainCheckShiftRotate(tree->AsOp());
}
//------------------------------------------------------------------------
@@ -76,6 +77,7 @@ void Lowering::LowerShift(GenTreeOp* shift)
BlockRange().Remove(andOp);
BlockRange().Remove(maskOp);
}
+ ContainCheckShiftRotate(shift);
}
//------------------------------------------------------------------------
@@ -86,12 +88,11 @@ void Lowering::LowerShift(GenTreeOp* shift)
//
// Notes:
// This involves:
+// - Handling of contained immediates.
// - Widening operations of unsigneds.
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- GenTree* op1 = storeLoc->gtGetOp1();
-
// Try to widen the ops if they are going into a local var.
if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (storeLoc->gtOp1->gtOper == GT_CNS_INT))
{
@@ -140,6 +141,39 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
}
}
+ ContainCheckStoreLoc(storeLoc);
+}
+
+//------------------------------------------------------------------------
+// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained.
+//
+// Arguments:
+// node - The indirect store node (GT_STORE_IND) of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::LowerStoreIndir(GenTreeIndir* node)
+{
+ // Mark all GT_STOREIND nodes to indicate that it is not known
+ // whether it represents a RMW memory op.
+ node->AsStoreInd()->SetRMWStatusDefault();
+
+ if (!varTypeIsFloating(node))
+ {
+ // Perform recognition of trees with the following structure:
+ // StoreInd(addr, BinOp(expr, GT_IND(addr)))
+ // to be able to fold this into an instruction of the form
+ // BINOP [addr], register
+ // where register is the actual place where 'expr' is computed.
+ //
+ // SSE2 doesn't support RMW form of instructions.
+ if (LowerRMWMemOp(node))
+ {
+ return;
+ }
+ }
+ ContainCheckStoreIndir(node);
}
//------------------------------------------------------------------------
@@ -178,6 +212,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
GenTree* initVal = source;
if (initVal->OperIsInitVal())
{
+ initVal->SetContained();
initVal = initVal->gtGetOp1();
}
srcAddrOrFill = initVal;
@@ -218,11 +253,19 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
initVal->gtType = TYP_LONG;
+ if ((fill == 0) && ((size & 0xf) == 0))
+ {
+ MakeSrcContained(blkNode, source);
+ }
}
#else // !_TARGET_AMD64_
initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
#endif // !_TARGET_AMD64_
+ if ((fill == 0) && ((size & 0xf) == 0))
+ {
+ MakeSrcContained(blkNode, source);
+ }
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
else
@@ -239,134 +282,165 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
#endif // !_TARGET_AMD64_
}
}
- else if (blkNode->gtOper == GT_STORE_OBJ)
+ else
{
- // CopyObj
+ if (blkNode->gtOper == GT_STORE_OBJ)
+ {
+ // CopyObj
- GenTreeObj* cpObjNode = blkNode->AsObj();
+ GenTreeObj* cpObjNode = blkNode->AsObj();
- unsigned slots = cpObjNode->gtSlots;
+ unsigned slots = cpObjNode->gtSlots;
#ifdef DEBUG
- // CpObj must always have at least one GC-Pointer as a member.
- assert(cpObjNode->gtGcPtrCount > 0);
-
- assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
-
- CORINFO_CLASS_HANDLE clsHnd = cpObjNode->gtClass;
- size_t classSize = comp->info.compCompHnd->getClassSize(clsHnd);
- size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE);
-
- // Currently, the EE always round up a class data structure so
- // we are not handling the case where we have a non multiple of pointer sized
- // struct. This behavior may change in the future so in order to keeps things correct
- // let's assert it just to be safe. Going forward we should simply
- // handle this case.
- assert(classSize == blkSize);
- assert((blkSize / TARGET_POINTER_SIZE) == slots);
- assert(cpObjNode->HasGCPtr());
+ // CpObj must always have at least one GC-Pointer as a member.
+ assert(cpObjNode->gtGcPtrCount > 0);
+
+ assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
+
+ CORINFO_CLASS_HANDLE clsHnd = cpObjNode->gtClass;
+ size_t classSize = comp->info.compCompHnd->getClassSize(clsHnd);
+ size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE);
+
+ // Currently, the EE always round up a class data structure so
+ // we are not handling the case where we have a non multiple of pointer sized
+ // struct. This behavior may change in the future so in order to keeps things correct
+ // let's assert it just to be safe. Going forward we should simply
+ // handle this case.
+ assert(classSize == blkSize);
+ assert((blkSize / TARGET_POINTER_SIZE) == slots);
+ assert(cpObjNode->HasGCPtr());
#endif
- bool IsRepMovsProfitable = false;
-
- // If the destination is not on the stack, let's find out if we
- // can improve code size by using rep movsq instead of generating
- // sequences of movsq instructions.
- if (!dstAddr->OperIsLocalAddr())
- {
- // Let's inspect the struct/class layout and determine if it's profitable
- // to use rep movsq for copying non-gc memory instead of using single movsq
- // instructions for each memory slot.
- unsigned i = 0;
- BYTE* gcPtrs = cpObjNode->gtGcPtrs;
+ bool IsRepMovsProfitable = false;
- do
+ // If the destination is not on the stack, let's find out if we
+ // can improve code size by using rep movsq instead of generating
+ // sequences of movsq instructions.
+ if (!dstAddr->OperIsLocalAddr())
{
- unsigned nonGCSlots = 0;
- // Measure a contiguous non-gc area inside the struct and note the maximum.
- while (i < slots && gcPtrs[i] == TYPE_GC_NONE)
- {
- nonGCSlots++;
- i++;
- }
+ // Let's inspect the struct/class layout and determine if it's profitable
+ // to use rep movsq for copying non-gc memory instead of using single movsq
+ // instructions for each memory slot.
+ unsigned i = 0;
+ BYTE* gcPtrs = cpObjNode->gtGcPtrs;
- while (i < slots && gcPtrs[i] != TYPE_GC_NONE)
+ do
{
- i++;
- }
+ unsigned nonGCSlots = 0;
+ // Measure a contiguous non-gc area inside the struct and note the maximum.
+ while (i < slots && gcPtrs[i] == TYPE_GC_NONE)
+ {
+ nonGCSlots++;
+ i++;
+ }
- if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
- {
- IsRepMovsProfitable = true;
- break;
- }
- } while (i < slots);
- }
- else if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
- {
- IsRepMovsProfitable = true;
- }
+ while (i < slots && gcPtrs[i] != TYPE_GC_NONE)
+ {
+ i++;
+ }
- // There are two cases in which we need to materialize the
- // struct size:
- // a) When the destination is on the stack we don't need to use the
- // write barrier, we can just simply call rep movsq and get a win in codesize.
- // b) If we determine we have contiguous non-gc regions in the struct where it's profitable
- // to use rep movsq instead of a sequence of single movsq instructions. According to the
- // Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where
- // the entire operation takes 20 cycles and encodes in 5 bytes (moving RCX, and calling rep movsq).
- if (IsRepMovsProfitable)
- {
- // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ IsRepMovsProfitable = true;
+ break;
+ }
+ } while (i < slots);
+ }
+ else if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ IsRepMovsProfitable = true;
+ }
+
+ // There are two cases in which we need to materialize the
+ // struct size:
+ // a) When the destination is on the stack we don't need to use the
+ // write barrier, we can just simply call rep movsq and get a win in codesize.
+ // b) If we determine we have contiguous non-gc regions in the struct where it's profitable
+ // to use rep movsq instead of a sequence of single movsq instructions. According to the
+ // Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where
+ // the entire operation takes 20 cycles and encodes in 5 bytes (moving RCX, and calling rep movsq).
+ if (IsRepMovsProfitable)
+ {
+ // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ }
+ else
+ {
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ }
}
else
{
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
- }
- }
- else
- {
- assert((blkNode->OperGet() == GT_STORE_BLK) || (blkNode->OperGet() == GT_STORE_DYN_BLK));
- // CopyBlk
- // In case of a CpBlk with a constant size and less than CPBLK_MOVS_LIMIT size
- // we can use rep movs to generate code instead of the helper call.
+ assert((blkNode->OperGet() == GT_STORE_BLK) || (blkNode->OperGet() == GT_STORE_DYN_BLK));
+ // CopyBlk
+ // In case of a CpBlk with a constant size and less than CPBLK_MOVS_LIMIT size
+ // we can use rep movs to generate code instead of the helper call.
- // This threshold will decide between using the helper or let the JIT decide to inline
- // a code sequence of its choice.
- unsigned helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
+ // This threshold will decide between using the helper or let the JIT decide to inline
+ // a code sequence of its choice.
+ unsigned helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
- // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
- if ((size != 0) && (size <= helperThreshold))
- {
- // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
- // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
- // our framework assemblies, so this is the main code generation scheme we'll use.
- if (size <= CPBLK_UNROLL_LIMIT)
+ // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
+ if ((size != 0) && (size <= helperThreshold))
{
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+ // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+ // our framework assemblies, so this is the main code generation scheme we'll use.
+ if (size <= CPBLK_UNROLL_LIMIT)
+ {
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+ // If src or dst are on stack, we don't have to generate the address
+ // into a register because it's just some constant+SP.
+ if ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, srcAddrOrFill);
+ }
+
+ if (dstAddr->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, dstAddr);
+ }
+ }
+ else
+ {
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ }
}
+#ifdef _TARGET_AMD64_
+ else
+ {
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ }
+#elif defined(_TARGET_X86_)
else
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
+#endif // _TARGET_X86_
+ assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
}
-#ifdef _TARGET_AMD64_
- else
+
+ // CopyObj or CopyBlk
+ if (source->gtOper == GT_IND)
{
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ // The GT_IND is contained, but the address must be in a register unless it is local.
+ MakeSrcContained(blkNode, source);
+ GenTree* addr = source->AsIndir()->Addr();
+ if (!addr->OperIsLocalAddr())
+ {
+ addr->ClearContained();
+ }
}
-#elif defined(_TARGET_X86_)
- else
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
{
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
}
-#endif // _TARGET_X86_
- assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
}
}
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
// LowerPutArgStk: Lower a GT_PUTARG_STK.
//
@@ -441,7 +515,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
#endif // DEBUG
head->gtLsraInfo = fieldList->gtLsraInfo;
- head->gtClearReg(comp);
BlockRange().InsertAfter(fieldList, head);
BlockRange().Remove(fieldList);
@@ -473,6 +546,38 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
putArgStk->gtNumberReferenceSlots++;
}
+ // For x86 we must mark all integral fields as contained or reg-optional, and handle them
+ // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
+ // registers to be consumed atomically by the call.
+ if (varTypeIsIntegralOrI(fieldNode))
+ {
+ if (fieldNode->OperGet() == GT_LCL_VAR)
+ {
+ LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
+ if (!varDsc->lvDoNotEnregister)
+ {
+ SetRegOptional(fieldNode);
+ }
+ else
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ }
+ else if (fieldNode->IsIntCnsFitsInI32())
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ else
+ {
+ // For the case where we cannot directly push the value, if we run out of registers,
+ // it would be better to defer computation until we are pushing the arguments rather
+ // than spilling, but this situation is not all that common, as most cases of promoted
+ // structs do not have a large number of fields, and of those most are lclVars or
+ // copy-propagated constants.
+ SetRegOptional(fieldNode);
+ }
+ }
+
prevOffset = fieldOffset;
}
@@ -494,15 +599,55 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
}
#endif // _TARGET_X86_
+ GenTreePtr src = putArgStk->gtOp1;
+
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
if (putArgStk->TypeGet() != TYP_STRUCT)
+#endif // FEATURE_PUT_STRUCT_ARG_STK
{
+ // If the child of GT_PUTARG_STK is a constant, we don't need a register to
+ // move it to memory (stack location).
+ //
+ // On AMD64, we don't want to make 0 contained, because we can generate smaller code
+ // by zeroing a register and then storing it. E.g.:
+ // xor rdx, rdx
+ // mov gword ptr [rsp+28H], rdx
+ // is 2 bytes smaller than:
+ // mov gword ptr [rsp+28H], 0
+ //
+ // On x86, we push stack arguments; we don't use 'mov'. So:
+ // push 0
+ // is 1 byte smaller than:
+ // xor rdx, rdx
+ // push rdx
+
+ if (IsContainableImmed(putArgStk, src)
+#if defined(_TARGET_AMD64_)
+ && !src->IsIntegralConst(0)
+#endif // _TARGET_AMD64_
+ )
+ {
+ MakeSrcContained(putArgStk, src);
+ }
return;
}
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
GenTreePtr dst = putArgStk;
- GenTreePtr src = putArgStk->gtOp1;
GenTreePtr srcAddr = nullptr;
+ bool haveLocalAddr = false;
+ if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
+ {
+ srcAddr = src->gtOp.gtOp1;
+ assert(srcAddr != nullptr);
+ haveLocalAddr = srcAddr->OperIsLocalAddr();
+ }
+ else
+ {
+ assert(varTypeIsSIMD(putArgStk));
+ }
+
// In case of a CpBlk we could use a helper call. In case of putarg_stk we
// can't do that since the helper call could kill some already set up outgoing args.
// TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj.
@@ -545,8 +690,17 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
{
putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr;
}
-}
+ // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
+ MakeSrcContained(putArgStk, src);
+ if (haveLocalAddr)
+ {
+ // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary
+ // copies.
+ //
+ MakeSrcContained(putArgStk, srcAddr);
+ }
#endif // FEATURE_PUT_STRUCT_ARG_STK
+}
/* Lower GT_CAST(srcType, DstType) nodes.
*
@@ -587,10 +741,10 @@ void Lowering::LowerCast(GenTree* tree)
{
assert(tree->OperGet() == GT_CAST);
- GenTreePtr op1 = tree->gtOp.gtOp1;
- var_types dstType = tree->CastToType();
- var_types srcType = op1->TypeGet();
- var_types tmpType = TYP_UNDEF;
+ GenTreePtr castOp = tree->gtCast.CastOp();
+ var_types castToType = tree->CastToType();
+ var_types srcType = castOp->TypeGet();
+ var_types tmpType = TYP_UNDEF;
// force the srcType to unsigned if GT_UNSIGNED flag is set
if (tree->gtFlags & GTF_UNSIGNED)
@@ -600,52 +754,96 @@ void Lowering::LowerCast(GenTree* tree)
// We should never see the following casts as they are expected to be lowered
// apropriately or converted into helper calls by front-end.
- // srcType = float/double dstType = * and overflow detecting cast
+ // srcType = float/double castToType = * and overflow detecting cast
// Reason: must be converted to a helper call
- // srcType = float/double, dstType = ulong
+ // srcType = float/double, castToType = ulong
// Reason: must be converted to a helper call
- // srcType = uint dstType = float/double
+ // srcType = uint castToType = float/double
// Reason: uint -> float/double = uint -> long -> float/double
- // srcType = ulong dstType = float
+ // srcType = ulong castToType = float
// Reason: ulong -> float = ulong -> double -> float
if (varTypeIsFloating(srcType))
{
noway_assert(!tree->gtOverflow());
- noway_assert(dstType != TYP_ULONG);
+ noway_assert(castToType != TYP_ULONG);
}
else if (srcType == TYP_UINT)
{
- noway_assert(!varTypeIsFloating(dstType));
+ noway_assert(!varTypeIsFloating(castToType));
}
else if (srcType == TYP_ULONG)
{
- noway_assert(dstType != TYP_FLOAT);
+ noway_assert(castToType != TYP_FLOAT);
}
// Case of src is a small type and dst is a floating point type.
- if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
+ if (varTypeIsSmall(srcType) && varTypeIsFloating(castToType))
{
// These conversions can never be overflow detecting ones.
noway_assert(!tree->gtOverflow());
tmpType = TYP_INT;
}
// case of src is a floating point type and dst is a small type.
- else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
+ else if (varTypeIsFloating(srcType) && varTypeIsSmall(castToType))
{
tmpType = TYP_INT;
}
if (tmpType != TYP_UNDEF)
{
- GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
+ GenTreePtr tmp = comp->gtNewCastNode(tmpType, castOp, tmpType);
tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
tree->gtFlags &= ~GTF_UNSIGNED;
tree->gtOp.gtOp1 = tmp;
- BlockRange().InsertAfter(op1, tmp);
+ BlockRange().InsertAfter(castOp, tmp);
+ ContainCheckCast(tmp->AsCast());
}
+
+ // Now determine if we have operands that should be contained.
+ ContainCheckCast(tree->AsCast());
}
+#ifdef FEATURE_SIMD
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node.
+//
+// Arguments:
+// simdNode - The SIMD intrinsic node.
+//
+void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
+{
+ if (simdNode->TypeGet() == TYP_SIMD12)
+ {
+ // GT_SIMD node requiring to produce TYP_SIMD12 in fact
+ // produces a TYP_SIMD16 result
+ simdNode->gtType = TYP_SIMD16;
+ }
+
+#ifdef _TARGET_XARCH_
+ if ((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem) && (simdNode->gtGetOp1()->OperGet() == GT_IND))
+ {
+ // If SIMD vector is already in memory, we force its
+ // addr to be evaluated into a reg. This would allow
+ // us to generate [regBase] or [regBase+offset] or
+ // [regBase+sizeOf(SIMD vector baseType)*regIndex]
+ // to access the required SIMD vector element directly
+ // from memory.
+ //
+ // TODO-CQ-XARCH: If addr of GT_IND is GT_LEA, we
+ // might be able update GT_LEA to fold the regIndex
+ // or offset in some cases. Instead with this
+ // approach we always evaluate GT_LEA into a reg.
+ // Ideally, we should be able to lower GetItem intrinsic
+ // into GT_IND(newAddr) where newAddr combines
+ // the addr of SIMD vector with the given index.
+ simdNode->gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
+ }
+#endif
+ ContainCheckSIMD(simdNode);
+}
+#endif // FEATURE_SIMD
+
//----------------------------------------------------------------------------------------------
// Lowering::IsRMWIndirCandidate:
// Returns true if the given operand is a candidate indirection for a read-modify-write
@@ -905,8 +1103,7 @@ bool Lowering::IsRMWMemOpRootedAtStoreInd(GenTreePtr tree, GenTreePtr* outIndirC
if (GenTree::OperIsBinary(oper))
{
// Return if binary op is not one of the supported operations for RMW of memory.
- if (oper != GT_ADD && oper != GT_SUB && oper != GT_AND && oper != GT_OR && oper != GT_XOR &&
- !GenTree::OperIsShiftOrRotate(oper))
+ if (!GenTree::OperIsRMWMemOp(oper))
{
storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
return false;
@@ -1088,15 +1285,19 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
assert(GenTree::OperIsBinary(tree->OperGet()));
assert(tree->OperIsCommutative() || tree->OperIsCompare() || tree->OperIs(GT_CMP));
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
- GenTree* preferredOp = nullptr;
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+ assert(!op1->IsRegOptional() && !op2->IsRegOptional());
+
+ // We default to op1, as op2 is likely to have the shorter lifetime.
+ GenTree* preferredOp = op1;
// This routine uses the following heuristics:
//
// a) If both are register candidates, marking the one with lower weighted
// ref count as reg-optional would likely be beneficial as it has
- // higher probability of not getting a register.
+ // higher probability of not getting a register. Note that we use !lvDoNotEnregister
+ // here because this is being done while we are adding lclVars for Lowering.
//
// b) op1 = tracked local and op2 = untracked local: LSRA creates two
// ref positions for op2: a def and use position. op2's def position
@@ -1131,51 +1332,25 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
LclVarDsc* v1 = comp->lvaTable + op1->AsLclVarCommon()->GetLclNum();
LclVarDsc* v2 = comp->lvaTable + op2->AsLclVarCommon()->GetLclNum();
- bool v1IsRegCandidate = !v1->lvDoNotEnregister && v1->lvTracked;
- bool v2IsRegCandidate = !v2->lvDoNotEnregister && v2->lvTracked;
+ bool v1IsRegCandidate = !v1->lvDoNotEnregister;
+ bool v2IsRegCandidate = !v2->lvDoNotEnregister;
if (v1IsRegCandidate && v2IsRegCandidate)
{
- // Both are tracked enregisterable locals. The one with lower weight is less likely
+ // Both are enregisterable locals. The one with lower weight is less likely
// to get a register and hence beneficial to mark the one with lower
// weight as reg optional.
- if (v1->lvRefCntWtd < v2->lvRefCntWtd)
- {
- preferredOp = op1;
- }
- else
+ // If either is not tracked, it may be that it was introduced after liveness
+ // was run, in which case we will always prefer op1 (should we use raw refcnt??).
+ if (v1->lvTracked && v2->lvTracked && (v1->lvRefCntWtd >= v2->lvRefCntWtd))
{
preferredOp = op2;
}
}
- else if (v2IsRegCandidate)
- {
- // v1 is not a reg candidate and its use position is less likely to get a register.
- preferredOp = op1;
- }
- else if (v1IsRegCandidate)
- {
- // v2 is not a reg candidate and its def position always
- // needs a reg. Hence it is better to mark v1 as
- // reg optional.
- preferredOp = op1;
- }
- else
- {
- preferredOp = op1;
- }
}
- else if (op1->OperGet() == GT_LCL_VAR)
- {
- preferredOp = op1;
- }
- else if (op2->OperGet() == GT_LCL_VAR)
+ else if (!(op1->OperGet() == GT_LCL_VAR) && (op2->OperGet() == GT_LCL_VAR))
{
preferredOp = op2;
}
- else
- {
- preferredOp = op1;
- }
return preferredOp;
}
@@ -1185,121 +1360,102 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
//------------------------------------------------------------------------
//------------------------------------------------------------------------
-// LowerRMWMemOp: Determine if this is a valid RMW mem op, and if so lower it accordingly
+// ContainCheckCallOperands: Determine whether operands of a call should be contained.
//
// Arguments:
-// node - The indirect store node (GT_STORE_IND) of interest
+// call - The call node of interest
//
// Return Value:
-// Returns true if 'node' is a valid RMW mem op; false otherwise.
+// None.
//
-bool Lowering::LowerRMWMemOp(GenTreeIndir* storeInd)
+void Lowering::ContainCheckCallOperands(GenTreeCall* call)
{
- assert(storeInd->OperGet() == GT_STOREIND);
-
- // SSE2 doesn't support RMW on float values
- assert(!varTypeIsFloating(storeInd));
-
- // Terminology:
- // indirDst = memory write of an addr mode (i.e. storeind destination)
- // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
- // indirCandidate = memory read i.e. a gtInd of an addr mode
- // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
-
- GenTreePtr indirCandidate = nullptr;
- GenTreePtr indirOpSource = nullptr;
-
- if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
+ GenTree* ctrlExpr = call->gtControlExpr;
+ if (call->gtCallType == CT_INDIRECT)
{
- JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
- storeInd->AsStoreInd()->GetRMWStatus());
- DISPTREERANGE(BlockRange(), storeInd);
- return false;
- }
+ // either gtControlExpr != null or gtCallAddr != null.
+ // Both cannot be non-null at the same time.
+ assert(ctrlExpr == nullptr);
+ assert(call->gtCallAddr != nullptr);
+ ctrlExpr = call->gtCallAddr;
- GenTreePtr indirDst = storeInd->gtGetOp1();
- GenTreePtr indirSrc = storeInd->gtGetOp2();
- genTreeOps oper = indirSrc->OperGet();
-
- // At this point we have successfully detected a RMW memory op of one of the following forms
- // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
- // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
- // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
- //
- // Here indirSrc = one of the supported binary or unary operation for RMW of memory
- // indirCandidate = a GT_IND node
- // indirCandidateChild = operand of GT_IND indirCandidate
- //
- // The logic below does the following
- // Make indirOpSource contained.
- // Make indirSrc contained.
- // Make indirCandidate contained.
- // Make indirCandidateChild contained.
- // Make indirDst contained except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
- // base.
- //
+#ifdef _TARGET_X86_
+ // Fast tail calls aren't currently supported on x86, but if they ever are, the code
+ // below that handles indirect VSD calls will need to be fixed.
+ assert(!call->IsFastTailCall() || !call->IsVirtualStub());
+#endif // _TARGET_X86_
+ }
- if (GenTree::OperIsBinary(oper))
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
{
- // On Xarch RMW operations require the source to be an immediate or in a register.
- // Therefore, if we have previously marked the indirOpSource as contained while lowering
- // the binary node, we need to reset that now.
- if (IsContainableMemoryOp(indirOpSource, true))
+ // we should never see a gtControlExpr whose type is void.
+ assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (!call->IsFastTailCall())
{
- indirOpSource->ClearContained();
+#ifdef _TARGET_X86_
+ // On x86, we need to generate a very specific pattern for indirect VSD calls:
+ //
+ // 3-byte nop
+ // call dword ptr [eax]
+ //
+ // Where EAX is also used as an argument to the stub dispatch helper. Make
+ // sure that the call target address is computed into EAX in this case.
+ if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
+ {
+ assert(ctrlExpr->isIndir());
+ MakeSrcContained(call, ctrlExpr);
+ }
+ else
+#endif // _TARGET_X86_
+ if (ctrlExpr->isIndir())
+ {
+ MakeSrcContained(call, ctrlExpr);
+ // We may have cases where we have set a register target on the ctrlExpr, but if it
+ // contained we must clear it.
+ ctrlExpr->gtRegNum = REG_NA;
+ }
}
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
}
- else
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
{
- assert(GenTree::OperIsUnary(oper));
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
- }
- DISPTREERANGE(BlockRange(), storeInd);
-
- indirSrc->SetContained();
- indirCandidate->SetContained();
+ GenTreePtr thisPtrNode = call->gtCallObjp;
- GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
- indirCandidateChild->SetContained();
-
- if (indirCandidateChild->OperGet() == GT_LEA)
- {
- GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
-
- if (addrMode->HasBase())
+ if (thisPtrNode->canBeContained())
{
- assert(addrMode->Base()->OperIsLeaf());
- addrMode->Base()->SetContained();
+ MakeSrcContained(call, thisPtrNode);
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ MakeSrcContained(call, thisPtrNode->gtOp.gtOp1);
+ }
}
+ }
- if (addrMode->HasIndex())
+ GenTree* args = call->gtCallArgs;
+ while (args)
+ {
+ GenTree* arg = args->gtOp.gtOp1;
+ if (arg->gtOper == GT_PUTARG_STK)
{
- assert(addrMode->Index()->OperIsLeaf());
- addrMode->Index()->SetContained();
+ LowerPutArgStk(arg->AsPutArgStk());
}
-
- indirDst->SetContained();
+ args = args->gtOp.gtOp2;
}
- else
+ args = call->gtCallLateArgs;
+ while (args)
{
- assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
- indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
-
- // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
- // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
- // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
- // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
- if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
- {
- indirDst->SetContained();
- }
- else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
+ GenTree* arg = args->gtOp.gtOp1;
+ if (arg->gtOper == GT_PUTARG_STK)
{
- indirDst->SetContained();
+ LowerPutArgStk(arg->AsPutArgStk());
}
+ args = args->gtOp.gtOp2;
}
- return true;
}
//------------------------------------------------------------------------
@@ -1380,74 +1536,23 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node)
}
//------------------------------------------------------------------------
-// ContainCheckBinary: Determine whether a binary op's operands should be contained.
+// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained.
//
// Arguments:
-// node - the node we care about
+// node - pointer to the node
//
-void Lowering::ContainCheckBinary(GenTreeOp* node)
+void Lowering::ContainCheckStoreIndir(GenTreeIndir* node)
{
- assert(node->OperIsBinary() && !varTypeIsFloating(node));
-
- // We're not marking a constant hanging on the left of an add
- // as containable so we assign it to a register having CQ impact.
- // TODO-XArch-CQ: Detect this case and support both generating a single instruction
- // for GT_ADD(Constant, SomeTree)
-
- GenTree* op1 = node->gtOp1;
- GenTree* op2 = node->gtOp2;
-
- // We can directly encode the second operand if it is either a containable constant or a memory-op.
- // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
- // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
- // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
- bool directlyEncodable = false;
- bool binOpInRMW = false;
- GenTreePtr operand = nullptr;
-
- if (IsContainableImmed(node, op2))
- {
- directlyEncodable = true;
- operand = op2;
- }
- else
- {
- binOpInRMW = IsBinOpInRMWStoreInd(node);
- if (!binOpInRMW)
- {
- const unsigned operatorSize = genTypeSize(node->TypeGet());
- if (IsContainableMemoryOp(op2, true) && (genTypeSize(op2->TypeGet()) == operatorSize))
- {
- directlyEncodable = true;
- operand = op2;
- }
- else if (node->OperIsCommutative())
- {
- if (IsContainableImmed(node, op1) ||
- (IsContainableMemoryOp(op1, true) && (genTypeSize(op1->TypeGet()) == operatorSize) &&
- IsSafeToContainMem(node, op1)))
- {
- // If it is safe, we can reverse the order of operands of commutative operations for efficient
- // codegen
- directlyEncodable = true;
- operand = op1;
- }
- }
- }
- }
-
- if (directlyEncodable)
+ // If the source is a containable immediate, make it contained, unless it is
+ // an int-size or larger store of zero to memory, because we can generate smaller code
+ // by zeroing a register and then storing it.
+ GenTree* src = node->gtOp.gtOp2;
+ if (IsContainableImmed(node, src) &&
+ (!src->IsIntegralConst(0) || varTypeIsSmall(node) || node->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
{
- assert(operand != nullptr);
- MakeSrcContained(node, operand);
- }
- else if (!binOpInRMW)
- {
- // If this binary op neither has contained operands, nor is a
- // Read-Modify-Write (RMW) operation, we can mark its operands
- // as reg optional.
- SetRegOptionalForBinOp(node);
+ MakeSrcContained(node, src);
}
+ ContainCheckIndir(node);
}
//------------------------------------------------------------------------
@@ -1471,11 +1576,11 @@ void Lowering::ContainCheckMul(GenTreeOp* node)
{
assert(node->OperGet() == GT_MUL);
- if (IsContainableMemoryOp(op2, true) || op2->IsCnsNonZeroFltOrDbl())
+ if (IsContainableMemoryOp(op2) || op2->IsCnsNonZeroFltOrDbl())
{
MakeSrcContained(node, op2);
}
- else if (op1->IsCnsNonZeroFltOrDbl() || (IsContainableMemoryOp(op1, true) && IsSafeToContainMem(node, op1)))
+ else if (op1->IsCnsNonZeroFltOrDbl() || (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1)))
{
// Since GT_MUL is commutative, we will try to re-order operands if it is safe to
// generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp)
@@ -1539,7 +1644,7 @@ void Lowering::ContainCheckMul(GenTreeOp* node)
}
MakeSrcContained(node, imm); // The imm is always contained
- if (IsContainableMemoryOp(other, true))
+ if (IsContainableMemoryOp(other))
{
memOp = other; // memOp may be contained below
}
@@ -1552,12 +1657,11 @@ void Lowering::ContainCheckMul(GenTreeOp* node)
//
if (memOp == nullptr)
{
- if (IsContainableMemoryOp(op2, true) && (op2->TypeGet() == node->TypeGet()) && IsSafeToContainMem(node, op2))
+ if (IsContainableMemoryOp(op2) && (op2->TypeGet() == node->TypeGet()) && IsSafeToContainMem(node, op2))
{
memOp = op2;
}
- else if (IsContainableMemoryOp(op1, true) && (op1->TypeGet() == node->TypeGet()) &&
- IsSafeToContainMem(node, op1))
+ else if (IsContainableMemoryOp(op1) && (op1->TypeGet() == node->TypeGet()) && IsSafeToContainMem(node, op1))
{
memOp = op1;
}
@@ -1699,7 +1803,7 @@ void Lowering::ContainCheckCast(GenTreeCast* node)
// U8 -> R8 conversion requires that the operand be in a register.
if (srcType != TYP_ULONG)
{
- if (IsContainableMemoryOp(castOp, true) || castOp->IsCnsNonZeroFltOrDbl())
+ if (IsContainableMemoryOp(castOp) || castOp->IsCnsNonZeroFltOrDbl())
{
MakeSrcContained(node, castOp);
}
@@ -1774,7 +1878,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp)
{
MakeSrcContained(cmp, otherOp);
}
- else if (IsContainableMemoryOp(otherOp, true) && ((otherOp == op2) || IsSafeToContainMem(cmp, otherOp)))
+ else if (IsContainableMemoryOp(otherOp) && ((otherOp == op2) || IsSafeToContainMem(cmp, otherOp)))
{
MakeSrcContained(cmp, otherOp);
}
@@ -1797,7 +1901,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp)
// we can treat the MemoryOp as contained.
if (op1Type == op2Type)
{
- if (IsContainableMemoryOp(op1, true))
+ if (IsContainableMemoryOp(op1))
{
MakeSrcContained(cmp, op1);
}
@@ -1846,11 +1950,11 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp)
// Note that TEST does not have a r,rm encoding like CMP has but we can still
// contain the second operand because the emitter maps both r,rm and rm,r to
// the same instruction code. This avoids the need to special case TEST here.
- if (IsContainableMemoryOp(op2, true))
+ if (IsContainableMemoryOp(op2))
{
MakeSrcContained(cmp, op2);
}
- else if (IsContainableMemoryOp(op1, true) && IsSafeToContainMem(cmp, op1))
+ else if (IsContainableMemoryOp(op1) && IsSafeToContainMem(cmp, op1))
{
MakeSrcContained(cmp, op1);
}
@@ -1872,72 +1976,206 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp)
}
//------------------------------------------------------------------------
-// ContainCheckFloatBinary: determine whether the sources of a floating point binary node should be contained.
+// LowerRMWMemOp: Determine if this is a valid RMW mem op, and if so lower it accordingly
//
// Arguments:
-// node - pointer to the node
+// node - The indirect store node (GT_STORE_IND) of interest
//
-void Lowering::ContainCheckFloatBinary(GenTreeOp* node)
+// Return Value:
+// Returns true if 'node' is a valid RMW mem op; false otherwise.
+//
+bool Lowering::LowerRMWMemOp(GenTreeIndir* storeInd)
{
- assert(node->OperIsBinary() && varTypeIsFloating(node));
+ assert(storeInd->OperGet() == GT_STOREIND);
- // overflow operations aren't supported on float/double types.
- assert(!node->gtOverflow());
+ // SSE2 doesn't support RMW on float values
+ assert(!varTypeIsFloating(storeInd));
- GenTree* op1 = node->gtGetOp1();
- GenTree* op2 = node->gtGetOp2();
+ // Terminology:
+ // indirDst = memory write of an addr mode (i.e. storeind destination)
+ // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
+ // indirCandidate = memory read i.e. a gtInd of an addr mode
+ // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(op1->TypeGet() == op2->TypeGet());
+ GenTreePtr indirCandidate = nullptr;
+ GenTreePtr indirOpSource = nullptr;
- if (IsContainableMemoryOp(op2, true) || op2->IsCnsNonZeroFltOrDbl())
+ if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
{
- MakeSrcContained(node, op2);
+ JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
+ storeInd->AsStoreInd()->GetRMWStatus());
+ DISPTREERANGE(BlockRange(), storeInd);
+ return false;
}
- else if (node->OperIsCommutative() &&
- (op1->IsCnsNonZeroFltOrDbl() || (IsContainableMemoryOp(op1, true) && IsSafeToContainMem(node, op1))))
+
+ GenTreePtr indirDst = storeInd->gtGetOp1();
+ GenTreePtr indirSrc = storeInd->gtGetOp2();
+ genTreeOps oper = indirSrc->OperGet();
+
+ // At this point we have successfully detected a RMW memory op of one of the following forms
+ // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
+ // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
+ // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
+ //
+ // Here indirSrc = one of the supported binary or unary operation for RMW of memory
+ // indirCandidate = a GT_IND node
+ // indirCandidateChild = operand of GT_IND indirCandidate
+ //
+ // The logic below does the following
+ // Make indirOpSource contained.
+ // Make indirSrc contained.
+ // Make indirCandidate contained.
+ // Make indirCandidateChild contained.
+ // Make indirDst contained except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
+ // base.
+ //
+
+ // We have already done containment analysis on the indirSrc op.
+ // If any of its operands are marked regOptional, reset that now.
+ indirSrc->AsOp()->gtOp1->ClearRegOptional();
+ if (GenTree::OperIsBinary(oper))
{
- // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands
- // as long as it is safe so that the following efficient code sequence is generated:
- // addss/sd targetReg, memOp (if op1Reg == targetReg) OR
- // movaps targetReg, op2Reg; addss/sd targetReg, [memOp]
- //
- // Instead of
- // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR
- // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
- MakeSrcContained(node, op1);
+ // On Xarch RMW operations require the source to be an immediate or in a register.
+ // Therefore, if we have previously marked the indirOpSource as contained while lowering
+ // the binary node, we need to reset that now.
+ if (IsContainableMemoryOp(indirOpSource))
+ {
+ indirOpSource->ClearContained();
+ }
+ indirSrc->AsOp()->gtOp2->ClearRegOptional();
+ JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
}
else
{
- // If there are no containable operands, we can make an operand reg optional.
- SetRegOptionalForBinOp(node);
+ assert(GenTree::OperIsUnary(oper));
+ JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
+ }
+ DISPTREERANGE(BlockRange(), storeInd);
+
+ indirSrc->SetContained();
+ indirCandidate->SetContained();
+
+ GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
+ indirCandidateChild->SetContained();
+
+ if (indirCandidateChild->OperGet() == GT_LEA)
+ {
+ GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
+
+ if (addrMode->HasBase())
+ {
+ assert(addrMode->Base()->OperIsLeaf());
+ addrMode->Base()->SetContained();
+ }
+
+ if (addrMode->HasIndex())
+ {
+ assert(addrMode->Index()->OperIsLeaf());
+ addrMode->Index()->SetContained();
+ }
+
+ indirDst->SetContained();
}
+ else
+ {
+ assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
+ indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
+
+ // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
+ // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
+ // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
+ // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
+ if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ indirDst->SetContained();
+ }
+ else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
+ {
+ indirDst->SetContained();
+ }
+ }
+ return true;
}
//------------------------------------------------------------------------
-// ContainCheckIntrinsic: determine whether the source of an INTRINSIC node should be contained.
+// ContainCheckBinary: Determine whether a binary op's operands should be contained.
//
// Arguments:
-// node - pointer to the node
+// node - the node we care about
//
-void Lowering::ContainCheckIntrinsic(GenTreeOp* node)
+void Lowering::ContainCheckBinary(GenTreeOp* node)
{
- assert(node->OperIs(GT_INTRINSIC));
- if (node->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)
+ assert(node->OperIsBinary());
+
+ if (varTypeIsFloating(node))
{
- GenTree* op1 = node->gtGetOp1();
- if (IsContainableMemoryOp(op1, true) || op1->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(node, op1);
- }
- else
+ assert(node->OperIs(GT_ADD, GT_SUB));
+ ContainCheckFloatBinary(node);
+ return;
+ }
+
+ // Codegen of these tree nodes sets ZF and SF flags.
+ node->gtFlags |= GTF_ZSF_SET;
+
+ // We're not marking a constant hanging on the left of an add
+ // as containable so we assign it to a register having CQ impact.
+ // TODO-XArch-CQ: Detect this case and support both generating a single instruction
+ // for GT_ADD(Constant, SomeTree)
+
+ GenTree* op1 = node->gtOp1;
+ GenTree* op2 = node->gtOp2;
+
+ // We can directly encode the second operand if it is either a containable constant or a memory-op.
+ // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
+ // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
+ // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
+ bool directlyEncodable = false;
+ bool binOpInRMW = false;
+ GenTreePtr operand = nullptr;
+
+ if (IsContainableImmed(node, op2))
+ {
+ directlyEncodable = true;
+ operand = op2;
+ }
+ else
+ {
+ binOpInRMW = IsBinOpInRMWStoreInd(node);
+ if (!binOpInRMW)
{
- // Mark the operand as reg optional since codegen can still
- // generate code if op1 is on stack.
- SetRegOptional(op1);
+ const unsigned operatorSize = genTypeSize(node->TypeGet());
+ if (IsContainableMemoryOp(op2) && (genTypeSize(op2->TypeGet()) == operatorSize))
+ {
+ directlyEncodable = true;
+ operand = op2;
+ }
+ else if (node->OperIsCommutative())
+ {
+ if (IsContainableImmed(node, op1) ||
+ (IsContainableMemoryOp(op1) && (genTypeSize(op1->TypeGet()) == operatorSize) &&
+ IsSafeToContainMem(node, op1)))
+ {
+ // If it is safe, we can reverse the order of operands of commutative operations for efficient
+ // codegen
+ directlyEncodable = true;
+ operand = op1;
+ }
+ }
}
}
+
+ if (directlyEncodable)
+ {
+ assert(operand != nullptr);
+ MakeSrcContained(node, operand);
+ }
+ else if (!binOpInRMW)
+ {
+ // If this binary op neither has contained operands, nor is a
+ // Read-Modify-Write (RMW) operation, we can mark its operands
+ // as reg optional.
+ SetRegOptionalForBinOp(node);
+ }
}
//------------------------------------------------------------------------
@@ -1958,7 +2196,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
{
other = node->gtIndex;
}
- else if (IsContainableMemoryOp(node->gtIndex, true))
+ else if (IsContainableMemoryOp(node->gtIndex))
{
other = node->gtIndex;
}
@@ -1969,7 +2207,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
{
- if (IsContainableMemoryOp(other, true))
+ if (IsContainableMemoryOp(other))
{
MakeSrcContained(node, other);
}
@@ -1981,6 +2219,31 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
}
}
+//------------------------------------------------------------------------
+// ContainCheckIntrinsic: determine whether the source of an INTRINSIC node should be contained.
+//
+// Arguments:
+// node - pointer to the node
+//
+void Lowering::ContainCheckIntrinsic(GenTreeOp* node)
+{
+ assert(node->OperIs(GT_INTRINSIC));
+ if (node->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)
+ {
+ GenTree* op1 = node->gtGetOp1();
+ if (IsContainableMemoryOp(op1) || op1->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(node, op1);
+ }
+ else
+ {
+ // Mark the operand as reg optional since codegen can still
+ // generate code if op1 is on stack.
+ SetRegOptional(op1);
+ }
+ }
+}
+
#ifdef FEATURE_SIMD
//----------------------------------------------------------------------------------------------
// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node.
@@ -2066,7 +2329,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
// If the index is a constant, mark it as contained.
CheckImmedAndMakeContained(simdNode, op2);
- if (IsContainableMemoryOp(op1, true))
+ if (IsContainableMemoryOp(op1))
{
MakeSrcContained(simdNode, op1);
if (op1->OperGet() == GT_IND)
@@ -2089,6 +2352,50 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
}
#endif // FEATURE_SIMD
+//------------------------------------------------------------------------
+// ContainCheckFloatBinary: determine whether the sources of a floating point binary node should be contained.
+//
+// Arguments:
+// node - pointer to the node
+//
+void Lowering::ContainCheckFloatBinary(GenTreeOp* node)
+{
+ assert(node->OperIsBinary() && varTypeIsFloating(node));
+
+ // overflow operations aren't supported on float/double types.
+ assert(!node->gtOverflow());
+
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(op1->TypeGet() == op2->TypeGet());
+
+ if (IsContainableMemoryOp(op2) || op2->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(node, op2);
+ }
+ else if (node->OperIsCommutative() &&
+ (op1->IsCnsNonZeroFltOrDbl() || (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1))))
+ {
+ // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands
+ // as long as it is safe so that the following efficient code sequence is generated:
+ // addss/sd targetReg, memOp (if op1Reg == targetReg) OR
+ // movaps targetReg, op2Reg; addss/sd targetReg, [memOp]
+ //
+ // Instead of
+ // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR
+ // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
+ MakeSrcContained(node, op1);
+ }
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(node);
+ }
+}
+
#endif // _TARGET_XARCH_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index a586066b5c..b38bb2ae38 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -10717,7 +10717,11 @@ void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation loca
// if there is a reg indicated on the tree node, use that for dstCandidates
// the exception is the NOP, which sometimes show up around late args.
// TODO-Cleanup: get rid of those NOPs.
- if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
+ if (node->gtRegNum == REG_STK)
+ {
+ dstCandidates = RBM_NONE;
+ }
+ else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
{
#ifdef ARM_SOFTFP
if (node->OperGet() == GT_PUTARG_REG)
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index 47f5da9cf5..fa7da8d2f9 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -749,29 +749,6 @@ private:
// Update reg state for an incoming register argument
void updateRegStateForArg(LclVarDsc* argDsc);
- inline void setTreeNodeInfo(GenTree* tree, TreeNodeInfo info)
- {
- tree->gtLsraInfo = info;
- tree->gtClearReg(compiler);
-
- DBEXEC(VERBOSE, info.dump(this));
- }
-
- inline void clearDstCount(GenTree* tree)
- {
- tree->gtLsraInfo.dstCount = 0;
- }
-
- inline void clearOperandCounts(GenTree* tree)
- {
- TreeNodeInfo& info = tree->gtLsraInfo;
- info.srcCount = 0;
- info.dstCount = 0;
-
- info.internalIntCount = 0;
- info.internalFloatCount = 0;
- }
-
inline bool isLocalDefUse(GenTree* tree)
{
return tree->gtLsraInfo.isLocalDefUse;
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
index 053b593e20..53da45b1cf 100644
--- a/src/jit/lsraarm.cpp
+++ b/src/jit/lsraarm.cpp
@@ -47,6 +47,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
Compiler* compiler = comp;
GenTree* op1 = tree->gtGetOp1();
+ assert(info->dstCount == 0);
if (tree->TypeGet() == TYP_LONG)
{
assert((op1->OperGet() == GT_LONG) && op1->isContained());
@@ -55,14 +56,12 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
info->srcCount = 2;
loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
- info->dstCount = 0;
}
else
{
regMaskTP useCandidates = RBM_NONE;
info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1;
- info->dstCount = 0;
if (varTypeIsStruct(tree))
{
@@ -114,7 +113,7 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
LinearScan* l = m_lsra;
Compiler* compiler = comp;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenarm.cpp):
// Here '-' means don't care.
@@ -218,8 +217,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfo* info = &(tree->gtLsraInfo);
RegisterType registerType = TypeGet(tree);
- JITDUMP("TreeNodeInfoInit for: ");
- DISPNODE(tree);
+ if (tree->isContained())
+ {
+ info->dstCount = 0;
+ assert(info->srcCount == 0);
+ return;
+ }
+
+ // Set the default dstCount. This may be modified below.
+ info->dstCount = tree->IsValue() ? 1 : 0;
switch (tree->OperGet())
{
@@ -238,11 +244,11 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->srcCount = 0;
if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
{
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
else
{
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
break;
@@ -259,7 +265,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case CORINFO_INTRINSIC_Abs:
case CORINFO_INTRINSIC_Sqrt:
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
default:
NYI_ARM("Lowering::TreeNodeInfoInit for GT_INTRINSIC");
@@ -272,7 +278,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
ContainCheckCast(tree->AsCast());
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// Non-overflow casts to/from float/double are done using SSE2 instructions
// and that allow the source operand to be either a reg or memop. Given the
@@ -355,31 +361,29 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_JTRUE:
info->srcCount = 0;
- info->dstCount = 0;
- l->clearDstCount(tree->gtOp.gtOp1);
+ assert(info->dstCount == 0);
break;
case GT_JMP:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_SWITCH:
// This should never occur since switch nodes must not be visible at this
// point in the JIT.
info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
noway_assert(!"Switch must be lowered at this point");
break;
case GT_JMPTABLE:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_SWITCH_TABLE:
info->srcCount = 2;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_ASG:
@@ -387,7 +391,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_ASG_SUB:
noway_assert(!"We should never hit any assignment operator in lowering");
info->srcCount = 0;
- info->dstCount = 0;
break;
case GT_ADD_LO:
@@ -406,7 +409,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
info->srcCount = 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
}
@@ -418,14 +421,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_XOR:
ContainCheckBinary(tree->AsOp());
info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_RETURNTRAP:
// this just turns into a compare of its child with an int
// + a conditional call
info->srcCount = 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_MUL:
@@ -442,7 +445,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_UDIV:
{
info->srcCount = 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
break;
@@ -458,7 +461,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_START_NONGC:
case GT_PROF_HOOK:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_LONG:
@@ -473,12 +476,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->srcCount = 0;
}
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_CNS_DBL:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
if (tree->TypeGet() == TYP_FLOAT)
{
// An int register for float constant
@@ -499,18 +502,16 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_RETFILT:
+ assert(info->dstCount == 0);
if (tree->TypeGet() == TYP_VOID)
{
info->srcCount = 0;
- info->dstCount = 0;
}
else
{
assert(tree->TypeGet() == TYP_INT);
info->srcCount = 1;
- info->dstCount = 0;
-
info->setSrcCandidates(l, RBM_INTRET);
tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
}
@@ -523,7 +524,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
// Consumes arrLen & index - has no result
info->srcCount = 2;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
break;
@@ -531,12 +532,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// These must have been lowered to GT_ARR_INDEX
noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_ARR_INDEX:
- info->srcCount = 2;
- info->dstCount = 1;
+ info->srcCount = 2;
+ assert(info->dstCount == 1);
info->internalIntCount = 1;
info->isInternalRegDelayFree = true;
@@ -550,7 +551,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
ContainCheckArrOffset(tree->AsArrOffs());
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- info->dstCount = 1;
+ assert(info->dstCount == 1);
if (tree->gtArrOffs.gtOffset->isContained())
{
@@ -580,7 +581,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
info->srcCount++;
}
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// An internal register may be needed too; the logic here should be in sync with the
// genLeaInstruction()'s requirements for a such register.
@@ -605,12 +606,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_NEG:
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_NOT:
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_LSH:
@@ -633,8 +634,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
+ info->srcCount = 1;
+ assert(info->dstCount == 1);
info->internalIntCount = 1;
break;
@@ -651,8 +652,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_INIT_VAL:
// Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
+ assert(!"INIT_VAL should always be contained");
break;
case GT_LCLHEAP:
@@ -661,8 +661,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_STOREIND:
{
- info->dstCount = 0;
- GenTree* src = tree->gtOp.gtOp2;
+ assert(info->dstCount == 0);
+ GenTree* src = tree->gtOp.gtOp2;
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
@@ -678,7 +678,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_NULLCHECK:
- info->dstCount = 0;
+ assert(info->dstCount == 0);
info->srcCount = 1;
info->isLocalDefUse = true;
// null check is an indirection on an addr
@@ -686,14 +686,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_IND:
- info->dstCount = 1;
+ assert(info->dstCount == 1);
info->srcCount = 1;
TreeNodeInfoInitIndir(tree->AsIndir());
break;
case GT_CATCH_ARG:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
break;
@@ -704,14 +704,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// It will produce a result of the type of the
// node, and use an internal register for the address.
- info->dstCount = 1;
+ assert(info->dstCount == 1);
assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
info->internalIntCount = 1;
break;
case GT_COPY:
info->srcCount = 1;
- info->dstCount = 1;
#ifdef ARM_SOFTFP
// This case currently only occurs for double types that are passed as TYP_LONG;
// actual long types would have been decomposed by now.
@@ -719,23 +718,24 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
info->dstCount = 2;
}
+ else
+#else
+ {
+ assert(info->dstCount == 1);
+ }
#endif
+ break;
+
+ case GT_PUTARG_SPLIT:
+ TreeNodeInfoInitPutArgSplit(tree->AsPutArgSplit());
+ break;
+
+ case GT_PUTARG_STK:
+ TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
break;
case GT_PUTARG_REG:
-#ifdef ARM_SOFTFP
- // This case currently only occurs for double types that are passed as TYP_LONG;
- // actual long types would have been decomposed by now.
- if (tree->TypeGet() == TYP_LONG)
- {
- info->srcCount = 2;
- }
- else
-#endif
- {
- info->srcCount = 1;
- }
- info->dstCount = info->srcCount;
+ TreeNodeInfoInitPutArgReg(tree->AsUnOp());
break;
default:
@@ -755,15 +755,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_CLS_VAR_ADDR:
case GT_IL_OFFSET:
case GT_CNS_INT:
- case GT_PUTARG_STK:
case GT_LABEL:
case GT_PINVOKE_PROLOG:
case GT_JCC:
case GT_SETCC:
case GT_MEMORYBARRIER:
case GT_OBJ:
- case GT_PUTARG_SPLIT:
- info->dstCount = tree->IsValue() ? 1 : 0;
+ assert(info->dstCount == (tree->IsValue() ? 1 : 0));
if (kind & (GTK_CONST | GTK_LEAF))
{
info->srcCount = 0;
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index 37391675b0..0e0c2c60c3 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -53,9 +53,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfo* info = &(tree->gtLsraInfo);
RegisterType registerType = TypeGet(tree);
- JITDUMP("TreeNodeInfoInit for: ");
- DISPNODE(tree);
- JITDUMP("\n");
+ if (tree->isContained())
+ {
+ info->dstCount = 0;
+ assert(info->srcCount == 0);
+ return;
+ }
+
+ // Set the default dstCount. This may be modified below.
+ info->dstCount = tree->IsValue() ? 1 : 0;
switch (tree->OperGet())
{
@@ -63,7 +69,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
GenTree* op2;
default:
- info->dstCount = tree->IsValue() ? 1 : 0;
if (kind & (GTK_CONST | GTK_LEAF))
{
info->srcCount = 0;
@@ -88,7 +93,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
info->srcCount = 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon());
break;
@@ -99,12 +104,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_START_NONGC:
case GT_PROF_HOOK:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_CNS_DBL:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
{
GenTreeDblCon* dblConst = tree->AsDblCon();
double constValue = dblConst->gtDblCon.gtDconVal;
@@ -126,7 +131,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_QMARK:
case GT_COLON:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
unreached();
break;
@@ -138,14 +143,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
if (tree->TypeGet() == TYP_VOID)
{
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
else
{
assert(tree->TypeGet() == TYP_INT);
info->srcCount = 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
info->setSrcCandidates(l, RBM_INTRET);
tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
@@ -159,42 +164,40 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->srcCount = 0;
if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
{
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
else
{
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
break;
case GT_JTRUE:
info->srcCount = 0;
- info->dstCount = 0;
- l->clearDstCount(tree->gtOp.gtOp1);
+ assert(info->dstCount == 0);
break;
case GT_JMP:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_SWITCH:
// This should never occur since switch nodes must not be visible at this
// point in the JIT.
info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
noway_assert(!"Switch must be lowered at this point");
break;
case GT_JMPTABLE:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_SWITCH_TABLE:
info->srcCount = 2;
info->internalIntCount = 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_ASG:
@@ -202,7 +205,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_ASG_SUB:
noway_assert(!"We should never hit any assignment operator in lowering");
info->srcCount = 0;
- info->dstCount = 0;
break;
case GT_ADD:
@@ -217,9 +219,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
info->srcCount = 2;
- info->dstCount = 1;
-
- break;
}
__fallthrough;
@@ -227,16 +226,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_AND:
case GT_OR:
case GT_XOR:
- ContainCheckBinary(tree->AsOp());
info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_RETURNTRAP:
// this just turns into a compare of its child with an int
// + a conditional call
info->srcCount = 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_MOD:
@@ -259,7 +257,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_UDIV:
{
info->srcCount = 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
break;
@@ -277,7 +275,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
assert(op1->TypeGet() == tree->TypeGet());
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
break;
@@ -294,7 +292,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// see CodeGen::genIntToIntCast()
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// Non-overflow casts to/from float/double are done using SSE2 instructions
// and that allow the source operand to be either a reg or memop. Given the
@@ -347,12 +345,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_NEG:
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_NOT:
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_LSH:
@@ -372,14 +370,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
+ info->srcCount = 1;
+ assert(info->dstCount == 1);
info->internalIntCount = 1;
break;
case GT_CMPXCHG:
info->srcCount = 3;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// TODO-ARM64-NYI
NYI("CMPXCHG");
@@ -388,7 +386,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_LOCKADD:
ContainCheckBinary(tree->AsOp());
info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
+ break;
+
+ case GT_PUTARG_STK:
+ TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
+ break;
+
+ case GT_PUTARG_REG:
+ TreeNodeInfoInitPutArgReg(tree->AsUnOp());
break;
case GT_CALL:
@@ -402,7 +408,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
assert(!l->isCandidateLocalRef(child));
MakeSrcContained(tree, child);
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
break;
@@ -411,7 +417,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// These should all be eliminated prior to Lowering.
assert(!"Non-store block node in Lowering");
info->srcCount = 0;
- info->dstCount = 0;
break;
case GT_STORE_BLK:
@@ -423,14 +428,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_INIT_VAL:
// Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
+ assert(!"INIT_VAL should always be contained");
break;
case GT_LCLHEAP:
{
ContainCheckLclHeap(tree->AsOp());
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
// Here '-' means don't care.
@@ -536,7 +540,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
GenTreeBoundsChk* node = tree->AsBoundsChk();
// Consumes arrLen & index - has no result
info->srcCount = 2;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
GenTree* intCns = nullptr;
GenTree* other = nullptr;
@@ -555,12 +559,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// These must have been lowered to GT_ARR_INDEX
noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_ARR_INDEX:
- info->srcCount = 2;
- info->dstCount = 1;
+ info->srcCount = 2;
+ assert(info->dstCount == 1);
info->internalIntCount = 1;
info->isInternalRegDelayFree = true;
@@ -574,8 +578,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
ContainCheckArrOffset(tree->AsArrOffs());
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- info->srcCount = tree->gtArrOffs.gtOffset->isContained() ? 2 : 3;
- info->dstCount = 1;
+ info->srcCount = tree->gtArrOffs.gtOffset->isContained() ? 2 : 3;
+ assert(info->dstCount == 1);
info->internalIntCount = 1;
break;
@@ -587,8 +591,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
GenTree* index = lea->Index();
unsigned cns = lea->gtOffset;
- // This LEA is instantiating an address,
- // so we set up the srcCount and dstCount here.
+ // This LEA is instantiating an address, so we set up the srcCount here.
info->srcCount = 0;
if (base != nullptr)
{
@@ -598,7 +601,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
info->srcCount++;
}
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// On ARM64 we may need a single internal register
// (when both conditions are true then we still only need a single internal register)
@@ -617,7 +620,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_STOREIND:
{
- info->dstCount = 0;
+ assert(info->dstCount == 0);
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
@@ -635,7 +638,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_NULLCHECK:
- info->dstCount = 0;
+ assert(info->dstCount == 0);
info->srcCount = 1;
info->isLocalDefUse = true;
// null check is an indirection on an addr
@@ -643,14 +646,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_IND:
- info->dstCount = 1;
+ assert(info->dstCount == 1);
info->srcCount = 1;
TreeNodeInfoInitIndir(tree->AsIndir());
break;
case GT_CATCH_ARG:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
break;
@@ -661,7 +664,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// It will produce a result of the type of the
// node, and use an internal register for the address.
- info->dstCount = 1;
+ assert(info->dstCount == 1);
assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
info->internalIntCount = 1;
break;
@@ -692,7 +695,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
regMaskTP useCandidates = RBM_NONE;
info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
if (varTypeIsStruct(tree))
{
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
index 2aca40f982..08fb4ba409 100644
--- a/src/jit/lsraarmarch.cpp
+++ b/src/jit/lsraarmarch.cpp
@@ -41,11 +41,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
//
void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- ContainCheckStoreLoc(storeLoc);
TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
GenTree* op1 = storeLoc->gtGetOp1();
- info->dstCount = 0;
+ assert(info->dstCount == 0);
#ifdef _TARGET_ARM_
if (varTypeIsLong(op1))
{
@@ -91,12 +90,17 @@ void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
//
void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
{
- ContainCheckCompare(tree->AsOp());
-
TreeNodeInfo* info = &(tree->gtLsraInfo);
info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
- info->dstCount = tree->OperIs(GT_CMP) ? 0 : 1;
+ if (info->isNoRegCompare)
+ {
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert((info->dstCount == 1) || tree->OperIs(GT_CMP));
+ }
}
void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
@@ -159,8 +163,6 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
//
void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
{
- ContainCheckIndir(indirTree);
-
// If this is the rhs of a block copy (i.e. non-enregisterable struct),
// it has no register requirements.
if (indirTree->TypeGet() == TYP_STRUCT)
@@ -284,16 +286,14 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitPutArgReg(
- GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs)
+void Lowering::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node)
{
assert(node != nullptr);
assert(node->OperIsPutArgReg());
+ node->gtLsraInfo.srcCount = 1;
+ regNumber argReg = node->gtRegNum;
assert(argReg != REG_NA);
- // Each register argument corresponds to one source.
- info.srcCount++;
-
// Set the register requirements for the node.
regMaskTP argMask = genRegMask(argReg);
#ifdef ARM_SOFTFP
@@ -301,7 +301,7 @@ void Lowering::TreeNodeInfoInitPutArgReg(
// The actual `long` types must have been transformed as a field list with two fields.
if (node->TypeGet() == TYP_LONG)
{
- info.srcCount++;
+ node->gtLsraInfo.srcCount++;
assert(genRegArgNext(argReg) == REG_NEXT(argReg));
argMask |= genRegMask(REG_NEXT(argReg));
}
@@ -312,8 +312,37 @@ void Lowering::TreeNodeInfoInitPutArgReg(
// To avoid redundant moves, have the argument operand computed in the
// register in which the argument is passed to the call.
node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node));
+}
+
+//------------------------------------------------------------------------
+// HandleFloatVarArgs: Handle additional register requirements for a varargs call
+//
+// Arguments:
+// call - The call node of interest
+// argNode - The current argument
+//
+// Return Value:
+// None.
+//
+// Notes:
+// In the case of a varargs call, the ABI dictates that if we have floating point args,
+// we must pass the enregistered arguments in both the integer and floating point registers.
+// Since the integer register is not associated with the arg node, we will reserve it as
+// an internal register on the call so that it is not used during the evaluation of the call node
+// (e.g. for the target).
+void Lowering::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
+{
+#if FEATURE_VARARG
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ *callHasFloatRegArgs = true;
- *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet());
+ regNumber argReg = argNode->gtRegNum;
+ regNumber targetReg = comp->getCallArgIntRegister(argReg);
+ call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1);
+ call->gtLsraInfo.addInternalCandidates(m_lsra, genRegMask(targetReg));
+ }
+#endif // FEATURE_VARARG
}
//------------------------------------------------------------------------
@@ -422,25 +451,8 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
info->setDstCandidates(l, RBM_INTRET);
}
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- thisPtrNode->SetContained();
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
// First, count reg args
+ // Each register argument corresponds to one source.
bool callHasFloatRegArgs = false;
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
@@ -449,29 +461,62 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
GenTreePtr argNode = list->Current();
+#ifdef DEBUG
+ // During TreeNodeInfoInit, we only use the ArgTabEntry for validation,
+ // as getting it is rather expensive.
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+ regNumber argReg = curArgTabEntry->regNum;
assert(curArgTabEntry);
+#endif
- if (curArgTabEntry->regNum == REG_STK)
+ if (argNode->gtOper == GT_PUTARG_STK)
{
// late arg that is not passed in a register
- assert(argNode->gtOper == GT_PUTARG_STK);
+ assert(curArgTabEntry->regNum == REG_STK);
+ GenTree* putArgChild = argNode->gtGetOp1();
+ if (!varTypeIsStruct(putArgChild) && !putArgChild->OperIs(GT_FIELD_LIST))
+ {
+#ifdef ARM_SOFTFP
+ // The `double` types have been transformed to `long` on armel, while the actual longs
+ // have been decomposed.
+ const bool isDouble = putArgChild->TypeGet() == TYP_LONG;
+ if (isDouble)
+ {
+ argNode->gtLsraInfo.srcCount = 2;
+ }
+#endif // ARM_SOFT_FP
+
+#ifdef DEBUG
+// Validate the slot count for this arg.
+#ifdef _TARGET_ARM_
+#ifndef ARM_SOFTFP
+ const bool isDouble = (curArgTabEntry->numSlots == 2) && (putArgChild->TypeGet() == TYP_DOUBLE);
+#endif // !ARM_SOFTFP
+
+ // We must not have a multi-reg struct; double uses 2 slots and isn't a multi-reg struct
+ assert((curArgTabEntry->numSlots == 1) || isDouble);
- TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
+#else // !_TARGET_ARM_
+ // We must not have a multi-reg struct
+ assert(curArgTabEntry->numSlots == 1);
+#endif // !_TARGET_ARM_
+#endif
+ }
continue;
}
// A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
if (argNode->OperGet() == GT_FIELD_LIST)
{
- argNode->SetContained();
+ assert(argNode->isContained());
// There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
- regNumber argReg = curArgTabEntry->regNum;
for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
{
- TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs);
-
+ info->srcCount++;
+#ifdef DEBUG
+ assert(entry->Current()->OperIs(GT_PUTARG_REG));
+ assert(entry->Current()->gtRegNum == argReg);
// Update argReg for the next putarg_reg (if any)
argReg = genRegArgNext(argReg);
@@ -482,18 +527,21 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
argReg = genRegArgNext(argReg);
}
#endif // _TARGET_ARM_
+#endif
}
}
#ifdef _TARGET_ARM_
else if (argNode->OperGet() == GT_PUTARG_SPLIT)
{
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- TreeNodeInfoInitPutArgSplit(argNode->AsPutArgSplit(), *info, curArgTabEntry);
}
#endif
else
{
- TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs);
+ assert(argNode->OperIs(GT_PUTARG_REG));
+ assert(argNode->gtRegNum == argReg);
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+ info->srcCount++;
}
}
@@ -518,25 +566,21 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
assert(curArgTabEntry);
assert(curArgTabEntry->regNum == REG_STK);
-
- TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
}
#ifdef _TARGET_ARM_
else if (arg->OperGet() == GT_PUTARG_SPLIT)
{
+#ifdef DEBUG
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
- TreeNodeInfoInitPutArgSplit(arg->AsPutArgSplit(), *info, curArgTabEntry);
+ assert(arg->AsPutArgSplit()->gtNumRegs == curArgTabEntry->numRegs);
+#endif
+ info->srcCount += arg->gtLsraInfo.dstCount;
}
#endif
else
{
TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if (argInfo->dstCount != 0)
- {
- argInfo->isLocalDefUse = true;
- }
-
- argInfo->dstCount = 0;
+ assert((argInfo->dstCount == 0) || (argInfo->isLocalDefUse));
}
}
args = args->gtOp.gtOp2;
@@ -576,16 +620,13 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// Notes:
// Set the child node(s) to be contained when we have a multireg arg
//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
+void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode)
{
assert(argNode->gtOper == GT_PUTARG_STK);
GenTreePtr putArgChild = argNode->gtOp.gtOp1;
- // Initialize 'argNode' as not contained, as this is both the default case
- // and how MakeSrcContained expects to find things setup.
- //
- argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.srcCount = 0;
argNode->gtLsraInfo.dstCount = 0;
// Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
@@ -595,9 +636,12 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
if (putArgChild->OperGet() == GT_FIELD_LIST)
{
+ assert(putArgChild->isContained());
// We consume all of the items in the GT_FIELD_LIST
- argNode->gtLsraInfo.srcCount = info->numSlots;
- putArgChild->SetContained();
+ for (GenTreeFieldList* current = putArgChild->AsFieldList(); current != nullptr; current = current->Rest())
+ {
+ argNode->gtLsraInfo.srcCount++;
+ }
}
else
{
@@ -617,8 +661,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
// We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
// as one contained operation
//
- MakeSrcContained(putArgChild, objChild);
- putArgChild->gtLsraInfo.srcCount--;
+ assert(objChild->isContained());
}
}
@@ -626,31 +669,24 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
// as one contained operation
//
argNode->gtLsraInfo.srcCount = putArgChild->gtLsraInfo.srcCount;
- MakeSrcContained(argNode, putArgChild);
+ assert(putArgChild->isContained());
}
}
else
{
-#ifdef _TARGET_ARM_
-
-#ifdef ARM_SOFTFP
- // The `double` types have been transformed to `long` on armel.
- const bool isDouble = (info->numSlots == 2) && (putArgChild->TypeGet() == TYP_LONG);
+#if defined(_TARGET_ARM_) && defined(ARM_SOFTFP)
+ // The `double` types have been transformed to `long` on armel,
+ // while the actual long types have been decomposed.
+ const bool isDouble = (putArgChild->TypeGet() == TYP_LONG);
if (isDouble)
{
argNode->gtLsraInfo.srcCount = 2;
}
-#else // !ARM_SOFTFP
- const bool isDouble = (info->numSlots == 2) && (putArgChild->TypeGet() == TYP_DOUBLE);
-#endif // !ARM_SOFTFP
-
- // We must not have a multi-reg struct; double uses 2 slots and isn't a multi-reg struct
- assert((info->numSlots == 1) || isDouble);
-
-#else // !_TARGET_ARM_
- // We must not have a multi-reg struct
- assert(info->numSlots == 1);
-#endif // !_TARGET_ARM_
+ else
+#endif // defined(_TARGET_ARM_) && defined(ARM_SOFTFP)
+ {
+ argNode->gtLsraInfo.srcCount = 1;
+ }
}
}
@@ -667,19 +703,18 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
// Notes:
// Set the child node(s) to be contained
//
-void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo& info, fgArgTabEntryPtr argInfo)
+void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode)
{
assert(argNode->gtOper == GT_PUTARG_SPLIT);
GenTreePtr putArgChild = argNode->gtOp.gtOp1;
// Registers for split argument corresponds to source
- argNode->gtLsraInfo.dstCount = argInfo->numRegs;
- info.srcCount += argInfo->numRegs;
+ argNode->gtLsraInfo.dstCount = argNode->gtNumRegs;
- regNumber argReg = argInfo->regNum;
+ regNumber argReg = argNode->gtRegNum;
regMaskTP argMask = RBM_NONE;
- for (unsigned i = 0; i < argInfo->numRegs; i++)
+ for (unsigned i = 0; i < argNode->gtNumRegs; i++)
{
argMask |= genRegMask((regNumber)((unsigned)argReg + i));
}
@@ -692,29 +727,32 @@ void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNode
// 1. Consume all of the items in the GT_FIELD_LIST (source)
// 2. Store to target slot and move to target registers (destination) from source
//
- argNode->gtLsraInfo.srcCount = argInfo->numRegs + argInfo->numSlots;
+ unsigned slotCount = 0;
// To avoid redundant moves, have the argument operand computed in the
// register in which the argument is passed to the call.
GenTreeFieldList* fieldListPtr = putArgChild->AsFieldList();
for (unsigned idx = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), idx++)
{
- if (idx < argInfo->numRegs)
+ if (idx < argNode->gtNumRegs)
{
GenTreePtr node = fieldListPtr->gtGetOp1();
node->gtLsraInfo.setSrcCandidates(m_lsra, genRegMask((regNumber)((unsigned)argReg + idx)));
}
+ else
+ {
+ slotCount++;
+ }
}
-
- putArgChild->SetContained();
+ argNode->gtLsraInfo.srcCount = argNode->gtNumRegs + slotCount;
+ assert(putArgChild->isContained());
}
else
{
assert(putArgChild->TypeGet() == TYP_STRUCT);
assert(putArgChild->OperGet() == GT_OBJ);
- // We could use a ldr/str sequence so we need a internal register
- argNode->gtLsraInfo.srcCount = 1;
+ // We can use a ldr/str sequence so we need an internal register
argNode->gtLsraInfo.internalIntCount = 1;
regMaskTP internalMask = RBM_ALLINT & ~argMask;
argNode->gtLsraInfo.setInternalCandidates(m_lsra, internalMask);
@@ -725,11 +763,13 @@ void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNode
// We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR
// as one contained operation
//
- MakeSrcContained(putArgChild, objChild);
- putArgChild->gtLsraInfo.srcCount--;
+ assert(objChild->isContained());
+ }
+ else
+ {
+ argNode->gtLsraInfo.srcCount = GetIndirSourceCount(putArgChild->AsIndir());
}
- argNode->gtLsraInfo.srcCount = putArgChild->gtLsraInfo.srcCount;
- MakeSrcContained(argNode, putArgChild);
+ assert(putArgChild->isContained());
}
}
#endif // _TARGET_ARM_
@@ -753,43 +793,17 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// Sources are dest address and initVal or source.
// We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 2;
- blkNode->gtLsraInfo.dstCount = 0;
- GenTreePtr srcAddrOrFill = nullptr;
- bool isInitBlk = blkNode->OperIsInitBlkOp();
-
- if (!isInitBlk)
- {
- // CopyObj or CopyBlk
- if (source->gtOper == GT_IND)
- {
- srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- source->SetContained();
- source->AsIndir()->Addr()->ClearContained();
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
- blkNode->gtLsraInfo.srcCount--;
- }
- }
+ blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr);
+ assert(blkNode->gtLsraInfo.dstCount == 0);
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
if (isInitBlk)
{
GenTreePtr initVal = source;
if (initVal->OperIsInitVal())
{
- initVal->SetContained();
+ assert(initVal->isContained());
initVal = initVal->gtGetOp1();
}
srcAddrOrFill = initVal;
@@ -801,22 +815,18 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// code sequences to improve CQ.
// For reference see the code in lsraxarch.cpp.
NYI_ARM("initblk loop unrolling is currently not implemented.");
-
-#ifdef _TARGET_ARM64_
- // No additional temporaries required
- ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
- if (fill == 0)
+ if (!initVal->isContained())
{
- MakeSrcContained(blkNode, source);
- blkNode->gtLsraInfo.srcCount--;
+ blkNode->gtLsraInfo.srcCount++;
}
-#endif // _TARGET_ARM64_
}
else
{
assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
// The helper follows the regular ABI.
dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ assert(!initVal->isContained());
+ blkNode->gtLsraInfo.srcCount++;
initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
if (size != 0)
{
@@ -838,6 +848,10 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
{
// CopyObj or CopyBlk
// Sources are src and dest and size if not constant.
+ if (source->gtOper == GT_IND)
+ {
+ srcAddrOrFill = blkNode->Data()->gtGetOp1();
+ }
if (blkNode->OperGet() == GT_STORE_OBJ)
{
// CopyObj
@@ -913,7 +927,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
else
{
// The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ assert(blkNode->gtOper == GT_STORE_DYN_BLK);
blkNode->gtLsraInfo.setSrcCount(3);
GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
@@ -925,6 +939,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
}
}
+ blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source);
}
}
@@ -951,6 +966,12 @@ int Lowering::GetOperandSourceCount(GenTree* node)
}
#endif // !defined(_TARGET_64BIT_)
+ if (node->OperIsIndir())
+ {
+ const unsigned srcCount = GetIndirSourceCount(node->AsIndir());
+ return srcCount;
+ }
+
return 0;
}
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index e90be05573..28d345ddfb 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -41,11 +41,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
//
void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- ContainCheckStoreLoc(storeLoc);
-
TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
- info->dstCount = 0;
- GenTree* op1 = storeLoc->gtGetOp1();
+ assert(info->dstCount == 0);
+ GenTree* op1 = storeLoc->gtGetOp1();
#ifdef _TARGET_X86_
if (op1->OperGet() == GT_LONG)
@@ -117,6 +115,17 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfo* info = &(tree->gtLsraInfo);
+ if (tree->isContained())
+ {
+ info->dstCount = 0;
+ assert(info->srcCount == 0);
+ TreeNodeInfoInitCheckByteable(tree);
+ return;
+ }
+
+ // Set the default dstCount. This may be modified below.
+ info->dstCount = tree->IsValue() ? 1 : 0;
+
// floating type generates AVX instruction (vmovss etc.), set the flag
SetContainsAVXFlags(varTypeIsFloating(tree->TypeGet()));
switch (tree->OperGet())
@@ -128,10 +137,28 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfoInitSimple(tree);
break;
- case GT_LCL_FLD:
case GT_LCL_VAR:
+ // Because we do containment analysis before we redo dataflow and identify register
+ // candidates, the containment analysis only !lvDoNotEnregister to estimate register
+ // candidates.
+ // If there is a lclVar that is estimated to be register candidate but
+ // is not, if they were marked regOptional they should now be marked contained instead.
+ // TODO-XArch-CQ: When this is being called while RefPositions are being created,
+ // use lvLRACandidate here instead.
+ if (info->regOptional)
+ {
+ if (!compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvTracked ||
+ compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvDoNotEnregister)
+ {
+ info->regOptional = false;
+ tree->SetContained();
+ info->dstCount = 0;
+ }
+ }
+ __fallthrough;
+
+ case GT_LCL_FLD:
info->srcCount = 0;
- info->dstCount = 1;
#ifdef FEATURE_SIMD
// Need an additional register to read upper 4 bytes of Vector3.
@@ -158,12 +185,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_START_NONGC:
case GT_PROF_HOOK:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_CNS_DBL:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
#if !defined(_TARGET_64BIT_)
@@ -173,14 +200,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
// An unused GT_LONG node needs to consume its sources.
info->srcCount = 2;
+ info->dstCount = 0;
}
else
{
- // Passthrough
+ // Passthrough. Should have been marked contained.
info->srcCount = 0;
+ assert(info->dstCount == 0);
}
-
- info->dstCount = 0;
break;
#endif // !defined(_TARGET_64BIT_)
@@ -190,7 +217,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_QMARK:
case GT_COLON:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
unreached();
break;
@@ -199,17 +226,16 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_RETFILT:
+ assert(info->dstCount == 0);
if (tree->TypeGet() == TYP_VOID)
{
info->srcCount = 0;
- info->dstCount = 0;
}
else
{
assert(tree->TypeGet() == TYP_INT);
info->srcCount = 1;
- info->dstCount = 0;
info->setSrcCandidates(l, RBM_INTRET);
tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
@@ -223,24 +249,23 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->srcCount = 0;
if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
{
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
else
{
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
break;
case GT_JTRUE:
{
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
GenTree* cmp = tree->gtGetOp1();
- l->clearDstCount(cmp);
+ assert(cmp->gtLsraInfo.dstCount == 0);
#ifdef FEATURE_SIMD
- ContainCheckJTrue(tree->AsOp());
GenTree* cmpOp1 = cmp->gtGetOp1();
GenTree* cmpOp2 = cmp->gtGetOp2();
if (cmpOp1->IsSIMDEqualityOrInequality() && cmpOp2->isContained())
@@ -249,46 +274,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// We always generate code for a SIMD equality comparison, but the compare itself produces no value.
// Neither the SIMD node nor the immediate need to be evaluated into a register.
- l->clearOperandCounts(cmp);
- l->clearDstCount(cmpOp1);
- l->clearOperandCounts(cmpOp2);
-
- // Codegen of SIMD (in)Equality uses target integer reg only for setting flags.
- // A target reg is not needed on AVX when comparing against Vector Zero.
- // In all other cases we need to reserve an int type internal register, since we
- // have cleared dstCount.
- if (!compiler->canUseAVX() || !cmpOp1->gtGetOp2()->IsIntegralConstVector(0))
- {
- ++(cmpOp1->gtLsraInfo.internalIntCount);
- regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l);
- internalCandidates |= l->allRegs(TYP_INT);
- cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates);
- }
-
- // We have to reverse compare oper in the following cases:
- // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
- // Therefore, if compare oper is == or != against false(0), we will
- // be checking opposite of what is required.
- //
- // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
- // Therefore, if compare oper is == or != against true(1), we will
- // be checking opposite of what is required.
- GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
- if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
- {
- if (cmpOp2->IsIntegralConst(0))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- else
- {
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
- if (cmpOp2->IsIntegralConst(1))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
+ assert(cmpOp1->gtLsraInfo.dstCount == 0);
+ assert(cmpOp2->gtLsraInfo.dstCount == 0);
}
#endif // FEATURE_SIMD
}
@@ -296,12 +283,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_JCC:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_SETCC:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
#ifdef _TARGET_X86_
info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
#endif // _TARGET_X86_
@@ -309,26 +296,25 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_JMP:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_SWITCH:
// This should never occur since switch nodes must not be visible at this
// point in the JIT.
info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
noway_assert(!"Switch must be lowered at this point");
break;
case GT_JMPTABLE:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_SWITCH_TABLE:
info->srcCount = 2;
info->internalIntCount = 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_ASG:
@@ -336,7 +322,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_ASG_SUB:
noway_assert(!"We should never hit any assignment operator in lowering");
info->srcCount = 0;
- info->dstCount = 0;
break;
#if !defined(_TARGET_64BIT_)
@@ -351,10 +336,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// Rather they only support "op xmm, mem/xmm" form.
if (varTypeIsFloating(tree->TypeGet()))
{
- ContainCheckFloatBinary(tree->AsOp());
- info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1);
+ info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
- info->dstCount = 1;
break;
}
@@ -363,19 +346,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_AND:
case GT_OR:
case GT_XOR:
- ContainCheckBinary(tree->AsOp());
- info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1);
+ info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
- info->dstCount = 1;
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
break;
case GT_RETURNTRAP:
// This just turns into a compare of its child with an int + a conditional call
- ContainCheckReturnTrap(tree->AsOp());
- info->srcCount = tree->gtOp.gtOp1->isContained() ? 0 : 1;
- info->dstCount = 0;
+ info->srcCount = tree->gtOp.gtOp1->isContained() ? 0 : 1;
+ assert(info->dstCount == 0);
info->internalIntCount = 1;
info->setInternalCandidates(l, l->allRegs(TYP_INT));
break;
@@ -406,7 +384,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
#endif // FEATURE_SIMD
case GT_CAST:
- ContainCheckCast(tree->AsCast());
TreeNodeInfoInitCast(tree);
break;
@@ -417,8 +394,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_NEG:
- info->srcCount = 1;
- info->dstCount = 1;
+ info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
// TODO-XArch-CQ:
// SSE instruction set doesn't have an instruction to negate a number.
@@ -441,16 +417,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->internalFloatCount = 1;
info->setInternalCandidates(l, l->internalFloatRegCandidates());
}
- else
- {
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
- }
break;
case GT_NOT:
- info->srcCount = 1;
- info->dstCount = 1;
+ info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
break;
case GT_LSH:
@@ -478,14 +448,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
+ info->srcCount = 1;
+ assert(info->dstCount == 1);
info->internalIntCount = 1;
break;
case GT_CMPXCHG:
info->srcCount = 3;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// comparand is preferenced to RAX.
// Remaining two operands can be in any reg other than RAX.
@@ -496,16 +466,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_LOCKADD:
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ op2 = tree->gtOp.gtOp2;
+ info->srcCount = op2->isContained() ? 1 : 2;
+ assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
+ break;
- if (CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2))
- {
- info->srcCount = 1;
- }
- else
- {
- info->srcCount = 2;
- }
+ case GT_PUTARG_REG:
+ TreeNodeInfoInitPutArgReg(tree->AsUnOp());
break;
case GT_CALL:
@@ -517,9 +484,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// For a GT_ADDR, the child node should not be evaluated into a register
GenTreePtr child = tree->gtOp.gtOp1;
assert(!l->isCandidateLocalRef(child));
- MakeSrcContained(tree, child);
+ assert(child->isContained());
+ assert(info->dstCount == 1);
info->srcCount = 0;
- info->dstCount = 1;
}
break;
@@ -531,12 +498,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// These should all be eliminated prior to Lowering.
assert(!"Non-store block node in Lowering");
info->srcCount = 0;
- info->dstCount = 0;
break;
#ifdef FEATURE_PUT_STRUCT_ARG_STK
case GT_PUTARG_STK:
- LowerPutArgStk(tree->AsPutArgStk());
TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
break;
#endif // FEATURE_PUT_STRUCT_ARG_STK
@@ -544,14 +509,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- LowerBlockStore(tree->AsBlk());
TreeNodeInfoInitBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
// Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
+ assert(!"INIT_VAL should always be contained");
break;
case GT_LCLHEAP:
@@ -562,23 +525,21 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- ContainCheckBoundsChk(tree->AsBoundsChk());
// Consumes arrLen & index - has no result
info->srcCount = GetOperandSourceCount(tree->AsBoundsChk()->gtIndex);
info->srcCount += GetOperandSourceCount(tree->AsBoundsChk()->gtArrLen);
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_ARR_ELEM:
// These must have been lowered to GT_ARR_INDEX
- noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+ noway_assert(!"We should never see a GT_ARR_ELEM after Lowering.");
info->srcCount = 0;
- info->dstCount = 0;
break;
case GT_ARR_INDEX:
info->srcCount = 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
// times while the result is being computed.
tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
@@ -588,27 +549,26 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_ARR_OFFSET:
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- info->srcCount = 2;
- info->dstCount = 1;
-
- if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+ assert(info->dstCount == 1);
+ if (tree->gtArrOffs.gtOffset->isContained())
{
- MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+ info->srcCount = 2;
}
else
{
info->srcCount++;
// Here we simply need an internal register, which must be different
// from any of the operand's registers, but may be the same as targetReg.
+ info->srcCount = 3;
info->internalIntCount = 1;
}
break;
case GT_LEA:
- // The LEA usually passes its operands through to the GT_IND, in which case we'll
- // clear the info->srcCount and info->dstCount later, but we may be instantiating an address,
- // so we set them here.
+ // The LEA usually passes its operands through to the GT_IND, in which case it will
+ // be contained, but we may be instantiating an address, in which case we set them here.
info->srcCount = 0;
+ assert(info->dstCount == 1);
if (tree->AsAddrMode()->HasBase())
{
info->srcCount++;
@@ -617,70 +577,38 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
info->srcCount++;
}
- info->dstCount = 1;
break;
case GT_STOREIND:
- {
- info->srcCount = 2;
- info->dstCount = 0;
- GenTree* src = tree->gtOp.gtOp2;
-
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
TreeNodeInfoInitGCWriteBarrier(tree);
break;
}
-
- // If the source is a containable immediate, make it contained, unless it is
- // an int-size or larger store of zero to memory, because we can generate smaller code
- // by zeroing a register and then storing it.
- if (IsContainableImmed(tree, src) &&
- (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
- {
- MakeSrcContained(tree, src);
- }
- else if (!varTypeIsFloating(tree))
- {
- // Perform recognition of trees with the following structure:
- // StoreInd(addr, BinOp(expr, GT_IND(addr)))
- // to be able to fold this into an instruction of the form
- // BINOP [addr], register
- // where register is the actual place where 'expr' is computed.
- //
- // SSE2 doesn't support RMW form of instructions.
- if (TreeNodeInfoInitIfRMWMemOp(tree))
- {
- break;
- }
- }
-
TreeNodeInfoInitIndir(tree->AsIndir());
- }
- break;
+ break;
case GT_NULLCHECK:
- info->dstCount = 0;
+ assert(info->dstCount == 0);
info->srcCount = 1;
info->isLocalDefUse = true;
break;
case GT_IND:
- info->dstCount = 1;
- info->srcCount = 1;
TreeNodeInfoInitIndir(tree->AsIndir());
+ assert(info->dstCount == 1);
break;
case GT_CATCH_ARG:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
break;
#if !FEATURE_EH_FUNCLETS
case GT_END_LFIN:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
#endif
@@ -715,8 +643,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
op1->gtLsraInfo.isTgtPref = true;
// Is this a non-commutative operator, or is op2 a contained memory op?
- // (Note that we can't call IsContained() at this point because it uses exactly the
- // same information we're currently computing.)
// In either case, we need to make op2 remain live until the op is complete, by marking
// the source(s) associated with op2 as "delayFree".
// Note that if op2 of a binary RMW operator is a memory op, even if the operator
@@ -760,8 +686,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
delayUseSrc = op1;
}
- else if ((op2 != nullptr) && (!tree->OperIsCommutative() ||
- (IsContainableMemoryOp(op2, true) && (op2->gtLsraInfo.srcCount == 0))))
+ else if ((op2 != nullptr) &&
+ (!tree->OperIsCommutative() || (IsContainableMemoryOp(op2) && (op2->gtLsraInfo.srcCount == 0))))
{
delayUseSrc = op2;
}
@@ -836,24 +762,24 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
}
- if (tree->OperIsSimple() && (info->srcCount > 0))
+ if (tree->OperIsSimple())
{
- // No need to set src candidates on a contained child operand.
GenTree* op = tree->gtOp.gtOp1;
- assert(op != nullptr);
- bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
+ if (op != nullptr)
{
- regMask = op->gtLsraInfo.getSrcCandidates(l);
- assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ // No need to set src candidates on a contained child operand.
+ if (!op->isContained())
+ {
+ regMask = op->gtLsraInfo.getSrcCandidates(l);
+ assert(regMask != RBM_NONE);
+ op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ }
}
if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
{
- op = tree->gtOp.gtOp2;
- containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
+ op = tree->gtOp.gtOp2;
+ if (!op->isContained())
{
regMask = op->gtLsraInfo.getSrcCandidates(l);
assert(regMask != RBM_NONE);
@@ -866,7 +792,7 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees
+// TreeNodeInfoInitSimple: Sets the srcCount for all the trees
// without special handling based on the tree node type.
//
// Arguments:
@@ -878,8 +804,12 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
{
TreeNodeInfo* info = &(tree->gtLsraInfo);
- unsigned kind = tree->OperKind();
- info->dstCount = tree->IsValue() ? 1 : 0;
+ if (tree->isContained())
+ {
+ info->srcCount = 0;
+ return;
+ }
+ unsigned kind = tree->OperKind();
if (kind & (GTK_CONST | GTK_LEAF))
{
info->srcCount = 0;
@@ -888,12 +818,9 @@ void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
{
if (tree->gtGetOp2IfPresent() != nullptr)
{
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
+ info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
}
+ info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1);
}
else
{
@@ -912,8 +839,6 @@ void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
//
void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
{
- ContainCheckRet(tree->AsOp());
-
TreeNodeInfo* info = &(tree->gtLsraInfo);
LinearScan* l = m_lsra;
Compiler* compiler = comp;
@@ -928,7 +853,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
info->srcCount = 2;
loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
else
#endif // !defined(_TARGET_64BIT_)
@@ -936,7 +861,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
regMaskTP useCandidates = RBM_NONE;
info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (varTypeIsStruct(tree))
@@ -996,10 +921,6 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
{
TreeNodeInfo* info = &(tree->gtLsraInfo);
LinearScan* l = m_lsra;
- ContainCheckShiftRotate(tree->AsOp());
-
- info->srcCount = 2;
- info->dstCount = 1;
// For shift operations, we need that the number
// of bits moved gets stored in CL in case
@@ -1076,16 +997,14 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitPutArgReg(
- GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs)
+void Lowering::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node)
{
assert(node != nullptr);
assert(node->OperIsPutArgReg());
+ node->gtLsraInfo.srcCount = 1;
+ regNumber argReg = node->gtRegNum;
assert(argReg != REG_NA);
- // Each register argument corresponds to one source.
- info.srcCount++;
-
// Set the register requirements for the node.
const regMaskTP argMask = genRegMask(argReg);
node->gtLsraInfo.setDstCandidates(m_lsra, argMask);
@@ -1094,20 +1013,35 @@ void Lowering::TreeNodeInfoInitPutArgReg(
// To avoid redundant moves, have the argument operand computed in the
// register in which the argument is passed to the call.
node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node));
+}
+//------------------------------------------------------------------------
+// HandleFloatVarArgs: Handle additional register requirements for a varargs call
+//
+// Arguments:
+// call - The call node of interest
+// argNode - The current argument
+//
+// Return Value:
+// None.
+//
+// Notes:
+// In the case of a varargs call, the ABI dictates that if we have floating point args,
+// we must pass the enregistered arguments in both the integer and floating point registers.
+// Since the integer register is not associated with the arg node, we will reserve it as
+// an internal register on the call so that it is not used during the evaluation of the call node
+// (e.g. for the target).
+void Lowering::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
+{
#if FEATURE_VARARG
- *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet());
-
- // In the case of a varargs call, the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the integer and floating point registers.
- // Since the integer register is not associated with this arg node, we will reserve it as
- // an internal register so that it is not used during the evaluation of the call node
- // (e.g. for the target).
- if (isVarArgs && varTypeIsFloating(node))
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
{
+ *callHasFloatRegArgs = true;
+
+ regNumber argReg = argNode->gtRegNum;
regNumber targetReg = comp->getCallArgIntRegister(argReg);
- info.setInternalIntCount(info.internalIntCount + 1);
- info.addInternalCandidates(m_lsra, genRegMask(targetReg));
+ call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1);
+ call->gtLsraInfo.addInternalCandidates(m_lsra, genRegMask(targetReg));
}
#endif // FEATURE_VARARG
}
@@ -1129,6 +1063,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
bool hasMultiRegRetVal = false;
ReturnTypeDesc* retTypeDesc = nullptr;
+ assert(!call->isContained());
info->srcCount = 0;
if (call->TypeGet() != TYP_VOID)
{
@@ -1141,43 +1076,36 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
}
else
{
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
}
else
{
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
GenTree* ctrlExpr = call->gtControlExpr;
if (call->gtCallType == CT_INDIRECT)
{
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
ctrlExpr = call->gtCallAddr;
-
-#ifdef _TARGET_X86_
- // Fast tail calls aren't currently supported on x86, but if they ever are, the code
- // below that handles indirect VSD calls will need to be fixed.
- assert(!call->IsFastTailCall() || !call->IsVirtualStub());
-#endif // _TARGET_X86_
}
// set reg requirements on call target represented as control sequence.
if (ctrlExpr != nullptr)
{
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- // call can take a Rm op on x64
-
// In case of fast tail implemented as jmp, make sure that gtControlExpr is
// computed into a register.
- if (!call->IsFastTailCall())
+ if (call->IsFastTailCall())
{
+ {
+ // Fast tail call - make sure that call target is always computed in RAX
+ // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ }
+ }
#ifdef _TARGET_X86_
+ else
+ {
// On x86, we need to generate a very specific pattern for indirect VSD calls:
//
// 3-byte nop
@@ -1187,24 +1115,11 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// sure that the call target address is computed into EAX in this case.
if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
{
- assert(ctrlExpr->isIndir());
-
+ assert(ctrlExpr->isIndir() && ctrlExpr->isContained());
ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET);
- MakeSrcContained(call, ctrlExpr);
}
- else
-#endif // _TARGET_X86_
- if (ctrlExpr->isIndir())
- {
- MakeSrcContained(call, ctrlExpr);
- }
- }
- else
- {
- // Fast tail call - make sure that call target is always computed in RAX
- // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
}
+#endif // _TARGET_X86_
info->srcCount += GetOperandSourceCount(ctrlExpr);
}
@@ -1259,24 +1174,6 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// callRegArgs + (callargs - placeholders, setup, etc)
// there is an explicit thisPtr but it is redundant
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- thisPtrNode->SetContained();
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
bool callHasFloatRegArgs = false;
bool isVarArgs = call->IsVarargs();
@@ -1296,16 +1193,34 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// Note that this property is statically checked by Lowering::CheckBlock.
GenTreePtr argNode = list->Current();
+ // Each register argument corresponds to one source.
+ if (argNode->OperIsPutArgReg())
+ {
+ info->srcCount++;
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+ }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (argNode->OperGet() == GT_FIELD_LIST)
+ {
+ for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
+ {
+ assert(entry->Current()->OperIsPutArgReg());
+ info->srcCount++;
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+ // In DEBUG only, check validity with respect to the arg table entry.
+
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
assert(curArgTabEntry);
if (curArgTabEntry->regNum == REG_STK)
{
// late arg that is not passed in a register
- DISPNODE(argNode);
assert(argNode->gtOper == GT_PUTARG_STK);
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the node is TYP_STRUCT and it is put on stack with
@@ -1316,35 +1231,33 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
if (argNode->TypeGet() == TYP_STRUCT)
{
assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
- argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
- argNode->gtLsraInfo.srcCount = 0;
+ assert(argNode->gtLsraInfo.srcCount == 0);
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
-
continue;
}
-
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (argNode->OperGet() == GT_FIELD_LIST)
{
- argNode->SetContained();
+ assert(argNode->isContained());
assert(varTypeIsStruct(argNode) || curArgTabEntry->isStruct);
- unsigned eightbyte = 0;
+ int i = 0;
for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
{
- const regNumber argReg = eightbyte == 0 ? curArgTabEntry->regNum : curArgTabEntry->otherRegNum;
- TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, isVarArgs, &callHasFloatRegArgs);
-
- eightbyte++;
+ const regNumber argReg = (i == 0) ? curArgTabEntry->regNum : curArgTabEntry->otherRegNum;
+ assert(entry->Current()->gtRegNum == argReg);
+ assert(i < 2);
+ i++;
}
}
else
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, isVarArgs,
- &callHasFloatRegArgs);
+ const regNumber argReg = curArgTabEntry->regNum;
+ assert(argNode->gtRegNum == argReg);
}
+#endif // DEBUG
}
// Now, count stack args
@@ -1361,41 +1274,11 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
if (!(args->gtFlags & GTF_LATE_ARG))
{
TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if (argInfo->dstCount != 0)
+ if ((argInfo->dstCount != 0) && !arg->IsArgPlaceHolderNode() && !arg->isContained())
{
argInfo->isLocalDefUse = true;
}
-
- // If the child of GT_PUTARG_STK is a constant, we don't need a register to
- // move it to memory (stack location).
- //
- // On AMD64, we don't want to make 0 contained, because we can generate smaller code
- // by zeroing a register and then storing it. E.g.:
- // xor rdx, rdx
- // mov gword ptr [rsp+28H], rdx
- // is 2 bytes smaller than:
- // mov gword ptr [rsp+28H], 0
- //
- // On x86, we push stack arguments; we don't use 'mov'. So:
- // push 0
- // is 1 byte smaller than:
- // xor rdx, rdx
- // push rdx
-
- argInfo->dstCount = 0;
- if (arg->gtOper == GT_PUTARG_STK)
- {
- GenTree* op1 = arg->gtOp.gtOp1;
- if (IsContainableImmed(arg, op1)
-#if defined(_TARGET_AMD64_)
- && !op1->IsIntegralConst(0)
-#endif // _TARGET_AMD64_
- )
- {
- MakeSrcContained(arg, op1);
- arg->gtLsraInfo.srcCount--;
- }
- }
+ assert(argInfo->dstCount == 0);
}
args = args->gtOp.gtOp2;
}
@@ -1432,8 +1315,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// Sources are dest address, initVal or source.
// We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 0;
- blkNode->gtLsraInfo.dstCount = 0;
+ blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr);
+ assert(blkNode->gtLsraInfo.dstCount == 0);
blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
GenTreePtr srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
@@ -1447,10 +1330,14 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
GenTree* initVal = source;
if (initVal->OperIsInitVal())
{
- initVal->SetContained();
+ assert(initVal->isContained());
initVal = initVal->gtGetOp1();
}
srcAddrOrFill = initVal;
+ if (!initVal->isContained())
+ {
+ blkNode->gtLsraInfo.srcCount++;
+ }
switch (blkNode->gtBlkOpKind)
{
@@ -1458,22 +1345,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
assert(initVal->IsCnsIntOrI());
if (size >= XMM_REGSIZE_BYTES)
{
- // Reserve an XMM register to fill it with
- // a pack of 16 init value constants.
- ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+ // Reserve an XMM register to fill it with a pack of 16 init value constants.
blkNode->gtLsraInfo.internalFloatCount = 1;
blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
- if ((fill == 0) && ((size & 0xf) == 0))
- {
- MakeSrcContained(blkNode, initVal);
- }
- // Use an XMM register to fill with constants; it's an AVX instruction, so set the flags.
+ // use XMM register to fill with constants, it's AVX instruction and set the flag
SetContainsAVXFlags();
}
- if (!initVal->isContained())
- {
- blkNode->gtLsraInfo.srcCount++;
- }
#ifdef _TARGET_X86_
if ((size & 1) != 0)
{
@@ -1491,7 +1368,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// a) The memory address to be in RDI.
// b) The fill value has to be in RAX.
// c) The buffer size will go in RCX.
- blkNode->gtLsraInfo.srcCount++;
dstAddrRegMask = RBM_RDI;
srcAddrOrFill = initVal;
sourceRegMask = RBM_RAX;
@@ -1501,7 +1377,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
case GenTreeBlk::BlkOpKindHelper:
#ifdef _TARGET_AMD64_
// The helper follows the regular AMD64 ABI.
- blkNode->gtLsraInfo.srcCount++;
dstAddrRegMask = RBM_ARG_0;
sourceRegMask = RBM_ARG_1;
blkSizeRegMask = RBM_ARG_2;
@@ -1521,23 +1396,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// CopyObj or CopyBlk
if (source->gtOper == GT_IND)
{
- srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- source->SetContained();
- source->AsIndir()->Addr()->ClearContained();
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
+ srcAddrOrFill = source->gtGetOp1();
}
if (blkNode->OperGet() == GT_STORE_OBJ)
{
@@ -1586,18 +1445,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// are used for codegen, set ContainsAVX flag
SetContainsAVXFlags();
}
- // If src or dst are on stack, we don't have to generate the address
- // into a register because it's just some constant+SP.
- if ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, srcAddrOrFill);
- }
-
- if (dstAddr->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, dstAddr);
- }
-
break;
case GenTreeBlk::BlkOpKindRepInstr:
@@ -1630,7 +1477,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source);
}
- blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(dstAddr);
if (dstAddrRegMask != RBM_NONE)
{
dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask);
@@ -1659,7 +1505,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
else
{
// The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ assert(blkNode->gtOper == GT_STORE_DYN_BLK);
blkNode->gtLsraInfo.setSrcCount(3);
GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask);
@@ -1682,6 +1528,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
TreeNodeInfo* info = &(putArgStk->gtLsraInfo);
LinearScan* l = m_lsra;
info->srcCount = 0;
+ assert(info->dstCount == 0);
if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
{
@@ -1699,50 +1546,15 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
const unsigned fieldOffset = current->gtFieldOffset;
assert(fieldType != TYP_LONG);
- // For x86 we must mark all integral fields as contained or reg-optional, and handle them
- // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
- // registers to be consumed atomically by the call.
- if (varTypeIsIntegralOrI(fieldNode))
- {
- if (fieldNode->OperGet() == GT_LCL_VAR)
- {
- LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
- if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
- {
- SetRegOptional(fieldNode);
- }
- else
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- }
- else if (fieldNode->IsIntCnsFitsInI32())
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- else
- {
- // For the case where we cannot directly push the value, if we run out of registers,
- // it would be better to defer computation until we are pushing the arguments rather
- // than spilling, but this situation is not all that common, as most cases of promoted
- // structs do not have a large number of fields, and of those most are lclVars or
- // copy-propagated constants.
- SetRegOptional(fieldNode);
- }
- }
#if defined(FEATURE_SIMD)
- // Note that we need to check the GT_FIELD_LIST type, not the fieldType. This is because the
+ // Note that we need to check the GT_FIELD_LIST type, not 'fieldType'. This is because the
// GT_FIELD_LIST will be TYP_SIMD12 whereas the fieldType might be TYP_SIMD16 for lclVar, where
// we "round up" to 16.
- else if (current->gtFieldType == TYP_SIMD12)
+ if (current->gtFieldType == TYP_SIMD12)
{
needsSimdTemp = true;
}
#endif // defined(FEATURE_SIMD)
- else
- {
- assert(varTypeIsFloating(fieldNode) || varTypeIsSIMD(fieldNode));
- }
// We can treat as a slot any field that is stored at a slot boundary, where the previous
// field is not in the same slot. (Note that we store the fields in reverse order.)
@@ -1771,8 +1583,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
}
}
- info->dstCount = 0;
-
if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push)
{
// If any of the fields cannot be stored with an actual push, we may need a temporary
@@ -1790,6 +1600,8 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
// For PutArgStk of a TYP_SIMD12, we need a SIMD temp register.
if (needsSimdTemp)
{
+ info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
+ assert(info->dstCount == 0);
info->internalFloatCount += 1;
info->addInternalCandidates(l, l->allSIMDRegs());
}
@@ -1804,7 +1616,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
if (putArgStk->TypeGet() == TYP_SIMD12)
{
info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
- info->dstCount = 0;
info->internalFloatCount = 1;
info->setInternalCandidates(l, l->allSIMDRegs());
return;
@@ -1821,19 +1632,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr src = putArgStk->gtOp1;
GenTreePtr srcAddr = nullptr;
- bool haveLocalAddr = false;
- if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
- {
- srcAddr = src->gtOp.gtOp1;
- assert(srcAddr != nullptr);
- haveLocalAddr = srcAddr->OperIsLocalAddr();
- }
- else
- {
- assert(varTypeIsSIMD(putArgStk));
- }
-
- info->dstCount = 0;
+ info->srcCount = GetOperandSourceCount(src);
// If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
// Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
@@ -1886,17 +1685,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
default:
unreached();
}
-
- // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
- MakeSrcContained(putArgStk, src);
-
- if (haveLocalAddr)
- {
- // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary
- // copies.
- MakeSrcContained(putArgStk, srcAddr);
- }
- info->srcCount = GetOperandSourceCount(src);
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
@@ -1911,13 +1699,12 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
//
void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
{
- ContainCheckLclHeap(tree->AsOp());
TreeNodeInfo* info = &(tree->gtLsraInfo);
LinearScan* l = m_lsra;
Compiler* compiler = comp;
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
// Here '-' means don't care.
@@ -2009,7 +1796,6 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
//
void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
{
- ContainCheckDivOrMod(tree->AsOp());
TreeNodeInfo* info = &(tree->gtLsraInfo);
LinearScan* l = m_lsra;
@@ -2018,7 +1804,7 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
info->srcCount = GetOperandSourceCount(op1);
info->srcCount += GetOperandSourceCount(op2);
- info->dstCount = 1;
+ assert(info->dstCount == 1);
if (varTypeIsFloating(tree->TypeGet()))
{
@@ -2085,8 +1871,6 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
//
void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
{
- ContainCheckIntrinsic(tree->AsOp());
-
TreeNodeInfo* info = &(tree->gtLsraInfo);
LinearScan* l = m_lsra;
@@ -2096,7 +1880,7 @@ void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
assert(op1->TypeGet() == tree->TypeGet());
info->srcCount = GetOperandSourceCount(op1);
- info->dstCount = 1;
+ assert(info->dstCount == 1);
switch (tree->gtIntrinsic.gtIntrinsicId)
{
@@ -2149,14 +1933,21 @@ void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
// Return Value:
// None.
-void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+void Lowering::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
{
- GenTreeSIMD* simdTree = tree->AsSIMD();
- ContainCheckSIMD(simdTree);
-
- TreeNodeInfo* info = &(tree->gtLsraInfo);
+ TreeNodeInfo* info = &(simdTree->gtLsraInfo);
LinearScan* lsra = m_lsra;
- info->dstCount = 1;
+ // Only SIMDIntrinsicInit can be contained. Other than that,
+ // only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount.
+ if (simdTree->isContained())
+ {
+ assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
+ }
+ else if (info->dstCount != 1)
+ {
+ assert((simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ||
+ (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality));
+ }
SetContainsAVXFlags(true, simdTree->gtSIMDSize);
switch (simdTree->gtSIMDIntrinsicID)
{
@@ -2165,7 +1956,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
case SIMDIntrinsicInit:
{
- op1 = tree->gtOp.gtOp1;
+ op1 = simdTree->gtOp.gtOp1;
#if !defined(_TARGET_64BIT_)
if (op1->OperGet() == GT_LONG)
@@ -2297,13 +2088,11 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
- info->srcCount = 2;
// On SSE4/AVX, we can generate optimal code for (in)equality
// against zero using ptest. We can safely do this optimization
// for integral vectors but not for floating-point for the reason
// that we have +0.0 and -0.0 and +0.0 == -0.0
- op2 = tree->gtGetOp2();
if (simdTree->gtGetOp2()->isContained())
{
info->srcCount = 1;
@@ -2321,6 +2110,19 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
info->internalFloatCount = 1;
info->setInternalCandidates(lsra, lsra->allSIMDRegs());
}
+ if (info->isNoRegCompare)
+ {
+ info->dstCount = 0;
+ // Codegen of SIMD (in)Equality uses target integer reg only for setting flags.
+ // A target reg is not needed on AVX when comparing against Vector Zero.
+ // In all other cases we need to reserve an int type internal register if we
+ // don't have a target register on the compare.
+ if (!comp->canUseAVX() || !simdTree->gtGetOp2()->IsIntegralConstVector(0))
+ {
+ info->internalIntCount = 1;
+ info->addInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+ }
+ }
break;
case SIMDIntrinsicDotProduct:
@@ -2367,9 +2169,10 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// - the source SIMD struct
// - index (which element to get)
// The result is baseType of SIMD struct.
+ // op1 may be a contained memory op, but if so we will consume its address.
info->srcCount = 0;
- op1 = tree->gtOp.gtOp1;
- op2 = tree->gtOp.gtOp2;
+ op1 = simdTree->gtOp.gtOp1;
+ op2 = simdTree->gtOp.gtOp2;
// op2 may be a contained constant.
if (!op2->isContained())
@@ -2579,7 +2382,7 @@ void Lowering::TreeNodeInfoInitCast(GenTree* tree)
var_types castOpType = castOp->TypeGet();
info->srcCount = GetOperandSourceCount(castOp);
- info->dstCount = 1;
+ assert(info->dstCount == 1);
if (tree->gtFlags & GTF_UNSIGNED)
{
castOpType = genUnsignedType(castOpType);
@@ -2615,25 +2418,11 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
GenTreePtr addr = dst->Addr();
GenTreePtr src = dst->Data();
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- int leaSrcCount = 0;
- if (lea->HasBase())
- {
- leaSrcCount++;
- }
- if (lea->HasIndex())
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
+ // In the case where we are doing a helper assignment, we need to actually instantiate the
+ // address in a register.
+ assert(!addr->isContained());
+ tree->gtLsraInfo.srcCount = 1 + GetIndirSourceCount(dst);
+ assert(tree->gtLsraInfo.dstCount == 0);
bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers.
@@ -2695,32 +2484,48 @@ void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
return;
}
- ContainCheckIndir(indirTree);
-
- GenTree* addr = indirTree->gtGetOp1();
TreeNodeInfo* info = &(indirTree->gtLsraInfo);
- GenTreePtr base = nullptr;
- GenTreePtr index = nullptr;
- unsigned mul, cns;
- bool rev;
-
info->srcCount = GetIndirSourceCount(indirTree);
if (indirTree->gtOper == GT_STOREIND)
{
GenTree* source = indirTree->gtOp.gtOp2;
if (indirTree->AsStoreInd()->IsRMWMemoryOp())
{
+ // Because 'source' is contained, we haven't yet determined its special register requirements, if any.
+ // As it happens, the Shift or Rotate cases are the only ones with special requirements.
+ assert(source->isContained() && source->OperIsRMWMemOp());
+ GenTree* nonMemSource = nullptr;
+
+ if (source->OperIsShiftOrRotate())
+ {
+ TreeNodeInfoInitShiftRotate(source);
+ }
if (indirTree->AsStoreInd()->IsRMWDstOp1())
{
if (source->OperIsBinary())
{
- info->srcCount += GetOperandSourceCount(source->gtOp.gtOp2);
+ nonMemSource = source->gtOp.gtOp2;
}
}
else if (indirTree->AsStoreInd()->IsRMWDstOp2())
{
- info->srcCount += GetOperandSourceCount(source->gtOp.gtOp1);
+ nonMemSource = source->gtOp.gtOp1;
+ }
+ if (nonMemSource != nullptr)
+ {
+ info->srcCount += GetOperandSourceCount(nonMemSource);
+ assert(!nonMemSource->isContained() || (!nonMemSource->isMemoryOp() && !nonMemSource->IsLocal()));
+#ifdef _TARGET_X86_
+ if (varTypeIsByte(indirTree) && !nonMemSource->isContained())
+ {
+ // If storeInd is of TYP_BYTE, set source to byteable registers.
+ regMaskTP regMask = nonMemSource->gtLsraInfo.getSrcCandidates(m_lsra);
+ regMask &= ~RBM_NON_BYTE_REGS;
+ assert(regMask != RBM_NONE);
+ nonMemSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask);
+ }
+#endif
}
}
else
@@ -2778,11 +2583,17 @@ void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
{
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP));
- ContainCheckCompare(tree->AsOp());
TreeNodeInfo* info = &(tree->gtLsraInfo);
info->srcCount = 0;
- info->dstCount = tree->OperIs(GT_CMP) ? 0 : 1;
+ if (info->isNoRegCompare)
+ {
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert((info->dstCount == 1) || tree->OperIs(GT_CMP));
+ }
#ifdef _TARGET_X86_
// If the compare is used by a jump, we just need to set the condition codes. If not, then we need
@@ -2798,7 +2609,10 @@ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();
- info->srcCount += GetOperandSourceCount(op1);
+ if (!op1->gtLsraInfo.isNoRegCompare)
+ {
+ info->srcCount += GetOperandSourceCount(op1);
+ }
info->srcCount += GetOperandSourceCount(op2);
#if !defined(_TARGET_64BIT_)
@@ -2816,166 +2630,6 @@ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
#endif // !defined(_TARGET_64BIT_)
}
-//--------------------------------------------------------------------------------------------
-// TreeNodeInfoInitIfRMWMemOp: Checks to see if there is a RMW memory operation rooted at
-// GT_STOREIND node and if so will mark register requirements for nodes under storeInd so
-// that CodeGen will generate a single instruction of the form:
-//
-// binOp [addressing mode], reg
-//
-// Parameters
-// storeInd - GT_STOREIND node
-//
-// Return value
-// True, if RMW memory op tree pattern is recognized and op counts are set.
-// False otherwise.
-//
-bool Lowering::TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd)
-{
- assert(storeInd->OperGet() == GT_STOREIND);
-
- // SSE2 doesn't support RMW on float values
- assert(!varTypeIsFloating(storeInd));
-
- // Terminology:
- // indirDst = memory write of an addr mode (i.e. storeind destination)
- // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
- // indirCandidate = memory read i.e. a gtInd of an addr mode
- // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
-
- GenTreePtr indirCandidate = nullptr;
- GenTreePtr indirOpSource = nullptr;
-
- if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
- {
- JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
- storeInd->AsStoreInd()->GetRMWStatus());
- DISPTREERANGE(BlockRange(), storeInd);
- return false;
- }
-
- GenTreePtr indirDst = storeInd->gtGetOp1();
- GenTreePtr indirSrc = storeInd->gtGetOp2();
- genTreeOps oper = indirSrc->OperGet();
-
- // At this point we have successfully detected a RMW memory op of one of the following forms
- // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
- // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
- // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
- //
- // Here indirSrc = one of the supported binary or unary operation for RMW of memory
- // indirCandidate = a GT_IND node
- // indirCandidateChild = operand of GT_IND indirCandidate
- //
- // The logic below essentially does the following
- // Make indirOpSource contained.
- // Make indirSrc contained.
- // Make indirCandidate contained.
- // Make indirCandidateChild contained.
- // Make indirDst contained except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
- // base.
- // Note that due to the way containment is supported, we accomplish some of the above by clearing operand counts
- // and directly propagating them upward.
- //
-
- TreeNodeInfo* info = &(storeInd->gtLsraInfo);
- info->dstCount = 0;
-
- if (GenTree::OperIsBinary(oper))
- {
- // On Xarch RMW operations require that the non-rmw operand be an immediate or in a register.
- // Therefore, if we have previously marked the indirOpSource as a contained memory op while lowering
- // the binary node, we need to reset that now.
- if (IsContainableMemoryOp(indirOpSource, true))
- {
- indirOpSource->ClearContained();
- }
- assert(!indirOpSource->isContained() || indirOpSource->OperIsConst());
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
- info->srcCount = indirOpSource->gtLsraInfo.dstCount;
- }
- else
- {
- assert(GenTree::OperIsUnary(oper));
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
- info->srcCount = 0;
- }
- DISPTREERANGE(BlockRange(), storeInd);
-
- m_lsra->clearOperandCounts(indirSrc);
- indirSrc->SetContained();
- m_lsra->clearOperandCounts(indirCandidate);
- indirCandidate->SetContained();
-
- GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
- if (indirCandidateChild->OperGet() == GT_LEA)
- {
- GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
-
- if (addrMode->HasBase())
- {
- assert(addrMode->Base()->OperIsLeaf());
- m_lsra->clearOperandCounts(addrMode->Base());
- addrMode->Base()->SetContained();
- info->srcCount++;
- }
-
- if (addrMode->HasIndex())
- {
- assert(addrMode->Index()->OperIsLeaf());
- m_lsra->clearOperandCounts(addrMode->Index());
- addrMode->Index()->SetContained();
- info->srcCount++;
- }
-
- m_lsra->clearOperandCounts(indirDst);
- indirDst->SetContained();
- }
- else
- {
- assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
- indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
-
- // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
- // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
- // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
- // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
- if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
- {
- m_lsra->clearOperandCounts(indirDst);
- indirDst->SetContained();
- }
- else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
- {
- m_lsra->clearOperandCounts(indirDst);
- indirDst->SetContained();
- }
- else
- {
- // Need a reg and hence increment src count of storeind
- info->srcCount += indirCandidateChild->gtLsraInfo.dstCount;
- }
- }
- m_lsra->clearOperandCounts(indirCandidateChild);
- indirCandidateChild->SetContained();
-
-#ifdef _TARGET_X86_
- if (varTypeIsByte(storeInd))
- {
- // If storeInd is of TYP_BYTE, set indirOpSources to byteable registers.
- bool containedNode = indirOpSource->gtLsraInfo.dstCount == 0;
- if (!containedNode)
- {
- regMaskTP regMask = indirOpSource->gtLsraInfo.getSrcCandidates(m_lsra);
- assert(regMask != RBM_NONE);
- indirOpSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
-#endif
-
- return true;
-}
-
//------------------------------------------------------------------------
// TreeNodeInfoInitMul: Set the NodeInfo for a multiply.
//
@@ -2987,20 +2641,17 @@ bool Lowering::TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd)
//
void Lowering::TreeNodeInfoInitMul(GenTreePtr tree)
{
- ContainCheckMul(tree->AsOp());
-
#if defined(_TARGET_X86_)
assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG));
#else
assert(tree->OperIs(GT_MUL, GT_MULHI));
#endif
TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
- info->srcCount = GetOperandSourceCount(op1);
+ GenTree* op1 = tree->gtOp.gtOp1;
+ GenTree* op2 = tree->gtOp.gtOp2;
+ info->srcCount = GetOperandSourceCount(op1);
info->srcCount += GetOperandSourceCount(op2);
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// Case of float/double mul.
if (varTypeIsFloating(tree->TypeGet()))
@@ -3171,7 +2822,7 @@ bool Lowering::ExcludeNonByteableRegisters(GenTree* tree)
GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
var_types baseType = simdNode->gtSIMDBaseType;
- if (!IsContainableMemoryOp(op1, true) && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType))
+ if (!IsContainableMemoryOp(op1) && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType))
{
bool ZeroOrSignExtnReqd = true;
unsigned baseSize = genTypeSize(baseType);
@@ -3230,9 +2881,6 @@ int Lowering::GetOperandSourceCount(GenTree* node)
if (node->OperIsIndir())
{
const unsigned srcCount = GetIndirSourceCount(node->AsIndir());
- // TODO-Cleanup: Once we are doing containment analysis during Lowering, this
- // can be removed, or changed to an assert.
- node->gtLsraInfo.srcCount = 0;
return srcCount;
}
diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h
index 5f03da2776..3f8532bd37 100644
--- a/src/jit/nodeinfo.h
+++ b/src/jit/nodeinfo.h
@@ -32,6 +32,7 @@ public:
regOptional = false;
definesAnyRegisters = false;
isInternalRegDelayFree = false;
+ isNoRegCompare = false;
#ifdef DEBUG
isInitialized = false;
#endif
@@ -144,6 +145,9 @@ public:
// in which result is produced.
unsigned char isInternalRegDelayFree : 1;
+ // True if this is a compare feeding a JTRUE that doesn't need to be generated into a register.
+ unsigned char isNoRegCompare : 1;
+
#ifdef DEBUG
// isInitialized is set when the tree node is handled.
unsigned char isInitialized : 1;
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index 5244c7424d..257e02d491 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -516,6 +516,7 @@ void Rationalizer::RewriteAssignment(LIR::Use& use)
location->gtType = TYP_BYREF;
assignment->SetOper(GT_STOREIND);
+ assignment->AsStoreInd()->SetRMWStatusDefault();
// TODO: JIT dump
}