summaryrefslogtreecommitdiff
path: root/src/jit/lsraxarch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/lsraxarch.cpp')
-rw-r--r--src/jit/lsraxarch.cpp1274
1 files changed, 489 insertions, 785 deletions
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index 6613fc3ba3..a42d8ec3fe 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -39,13 +39,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// - Setting the appropriate candidates for a store of a multi-reg call return value.
// - Requesting an internal register for SIMD12 stores.
//
-void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
+void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- ContainCheckStoreLoc(storeLoc);
-
TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
- info->dstCount = 0;
- GenTree* op1 = storeLoc->gtGetOp1();
+ assert(info->dstCount == 0);
+ GenTree* op1 = storeLoc->gtGetOp1();
#ifdef _TARGET_X86_
if (op1->OperGet() == GT_LONG)
@@ -72,8 +70,8 @@ void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
info->srcCount = retTypeDesc->GetReturnRegCount();
// Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+ regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
+ op1->gtLsraInfo.setSrcCandidates(this, srcCandidates);
return;
}
else
@@ -88,7 +86,7 @@ void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
{
// Need an additional register to extract upper 4 bytes of Vector3.
info->internalFloatCount = 1;
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
}
return;
}
@@ -110,13 +108,21 @@ void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
// requirements needed by LSRA to build the Interval Table (source,
// destination and internal [temp] register counts).
//
-void Lowering::TreeNodeInfoInit(GenTree* tree)
+void LinearScan::TreeNodeInfoInit(GenTree* tree)
{
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
TreeNodeInfo* info = &(tree->gtLsraInfo);
+ if (tree->isContained())
+ {
+ info->dstCount = 0;
+ assert(info->srcCount == 0);
+ TreeNodeInfoInitCheckByteable(tree);
+ return;
+ }
+
+ // Set the default dstCount. This may be modified below.
+ info->dstCount = tree->IsValue() ? 1 : 0;
+
// floating type generates AVX instruction (vmovss etc.), set the flag
SetContainsAVXFlags(varTypeIsFloating(tree->TypeGet()));
switch (tree->OperGet())
@@ -128,10 +134,28 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfoInitSimple(tree);
break;
- case GT_LCL_FLD:
case GT_LCL_VAR:
+ // Because we do containment analysis before we redo dataflow and identify register
+ // candidates, the containment analysis only !lvDoNotEnregister to estimate register
+ // candidates.
+ // If there is a lclVar that is estimated to be register candidate but
+ // is not, if they were marked regOptional they should now be marked contained instead.
+ // TODO-XArch-CQ: When this is being called while RefPositions are being created,
+ // use lvLRACandidate here instead.
+ if (info->regOptional)
+ {
+ if (!compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvTracked ||
+ compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvDoNotEnregister)
+ {
+ info->regOptional = false;
+ tree->SetContained();
+ info->dstCount = 0;
+ }
+ }
+ __fallthrough;
+
+ case GT_LCL_FLD:
info->srcCount = 0;
- info->dstCount = 1;
#ifdef FEATURE_SIMD
// Need an additional register to read upper 4 bytes of Vector3.
@@ -141,7 +165,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// because both targetReg and internal reg will be in use at the same time.
info->internalFloatCount = 1;
info->isInternalRegDelayFree = true;
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
}
#endif
break;
@@ -158,12 +182,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_START_NONGC:
case GT_PROF_HOOK:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_CNS_DBL:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
#if !defined(_TARGET_64BIT_)
@@ -173,14 +197,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
// An unused GT_LONG node needs to consume its sources.
info->srcCount = 2;
+ info->dstCount = 0;
}
else
{
- // Passthrough
+ // Passthrough. Should have been marked contained.
info->srcCount = 0;
+ assert(info->dstCount == 0);
}
-
- info->dstCount = 0;
break;
#endif // !defined(_TARGET_64BIT_)
@@ -190,7 +214,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_QMARK:
case GT_COLON:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
unreached();
break;
@@ -199,20 +223,19 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_RETFILT:
+ assert(info->dstCount == 0);
if (tree->TypeGet() == TYP_VOID)
{
info->srcCount = 0;
- info->dstCount = 0;
}
else
{
assert(tree->TypeGet() == TYP_INT);
info->srcCount = 1;
- info->dstCount = 0;
- info->setSrcCandidates(l, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+ info->setSrcCandidates(this, RBM_INTRET);
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, RBM_INTRET);
}
break;
@@ -223,24 +246,23 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->srcCount = 0;
if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
{
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
else
{
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
break;
case GT_JTRUE:
{
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
GenTree* cmp = tree->gtGetOp1();
- l->clearDstCount(cmp);
+ assert(cmp->gtLsraInfo.dstCount == 0);
#ifdef FEATURE_SIMD
- ContainCheckJTrue(tree->AsOp());
GenTree* cmpOp1 = cmp->gtGetOp1();
GenTree* cmpOp2 = cmp->gtGetOp2();
if (cmpOp1->IsSIMDEqualityOrInequality() && cmpOp2->isContained())
@@ -249,46 +271,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// We always generate code for a SIMD equality comparison, but the compare itself produces no value.
// Neither the SIMD node nor the immediate need to be evaluated into a register.
- l->clearOperandCounts(cmp);
- l->clearDstCount(cmpOp1);
- l->clearOperandCounts(cmpOp2);
-
- // Codegen of SIMD (in)Equality uses target integer reg only for setting flags.
- // A target reg is not needed on AVX when comparing against Vector Zero.
- // In all other cases we need to reserve an int type internal register, since we
- // have cleared dstCount.
- if (!compiler->canUseAVX() || !cmpOp1->gtGetOp2()->IsIntegralConstVector(0))
- {
- ++(cmpOp1->gtLsraInfo.internalIntCount);
- regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l);
- internalCandidates |= l->allRegs(TYP_INT);
- cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates);
- }
-
- // We have to reverse compare oper in the following cases:
- // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
- // Therefore, if compare oper is == or != against false(0), we will
- // be checking opposite of what is required.
- //
- // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
- // Therefore, if compare oper is == or != against true(1), we will
- // be checking opposite of what is required.
- GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
- if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
- {
- if (cmpOp2->IsIntegralConst(0))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- else
- {
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
- if (cmpOp2->IsIntegralConst(1))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
+ assert(cmpOp1->gtLsraInfo.dstCount == 0);
+ assert(cmpOp2->gtLsraInfo.dstCount == 0);
}
#endif // FEATURE_SIMD
}
@@ -296,39 +280,38 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_JCC:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_SETCC:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
#ifdef _TARGET_X86_
- info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
+ info->setDstCandidates(this, RBM_BYTE_REGS);
#endif // _TARGET_X86_
break;
case GT_JMP:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_SWITCH:
// This should never occur since switch nodes must not be visible at this
// point in the JIT.
info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
noway_assert(!"Switch must be lowered at this point");
break;
case GT_JMPTABLE:
info->srcCount = 0;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
break;
case GT_SWITCH_TABLE:
info->srcCount = 2;
info->internalIntCount = 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_ASG:
@@ -336,7 +319,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_ASG_SUB:
noway_assert(!"We should never hit any assignment operator in lowering");
info->srcCount = 0;
- info->dstCount = 0;
break;
#if !defined(_TARGET_64BIT_)
@@ -351,10 +333,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// Rather they only support "op xmm, mem/xmm" form.
if (varTypeIsFloating(tree->TypeGet()))
{
- ContainCheckFloatBinary(tree->AsOp());
- info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1);
+ info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
- info->dstCount = 1;
break;
}
@@ -363,21 +343,16 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_AND:
case GT_OR:
case GT_XOR:
- ContainCheckBinary(tree->AsOp());
- info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1);
+ info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
- info->dstCount = 1;
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
break;
case GT_RETURNTRAP:
// This just turns into a compare of its child with an int + a conditional call
- ContainCheckReturnTrap(tree->AsOp());
- info->srcCount = tree->gtOp.gtOp1->isContained() ? 0 : 1;
- info->dstCount = 0;
+ info->srcCount = tree->gtOp.gtOp1->isContained() ? 0 : 1;
+ assert(info->dstCount == 0);
info->internalIntCount = 1;
- info->setInternalCandidates(l, l->allRegs(TYP_INT));
+ info->setInternalCandidates(this, allRegs(TYP_INT));
break;
case GT_MOD:
@@ -406,7 +381,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
#endif // FEATURE_SIMD
case GT_CAST:
- ContainCheckCast(tree->AsCast());
TreeNodeInfoInitCast(tree);
break;
@@ -417,8 +391,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_NEG:
- info->srcCount = 1;
- info->dstCount = 1;
+ info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
// TODO-XArch-CQ:
// SSE instruction set doesn't have an instruction to negate a number.
@@ -439,18 +412,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
if (varTypeIsFloating(tree))
{
info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->internalFloatRegCandidates());
- }
- else
- {
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
+ info->setInternalCandidates(this, internalFloatRegCandidates());
}
break;
case GT_NOT:
- info->srcCount = 1;
- info->dstCount = 1;
+ info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
break;
case GT_LSH:
@@ -478,34 +445,31 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
+ info->srcCount = 1;
+ assert(info->dstCount == 1);
info->internalIntCount = 1;
break;
case GT_CMPXCHG:
info->srcCount = 3;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// comparand is preferenced to RAX.
// Remaining two operands can be in any reg other than RAX.
- tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtLsraInfo.setDstCandidates(l, RBM_RAX);
+ tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(this, RBM_RAX);
+ tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
+ tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
+ tree->gtLsraInfo.setDstCandidates(this, RBM_RAX);
break;
case GT_LOCKADD:
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ op2 = tree->gtOp.gtOp2;
+ info->srcCount = op2->isContained() ? 1 : 2;
+ assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
+ break;
- if (CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2))
- {
- info->srcCount = 1;
- }
- else
- {
- info->srcCount = 2;
- }
+ case GT_PUTARG_REG:
+ TreeNodeInfoInitPutArgReg(tree->AsUnOp());
break;
case GT_CALL:
@@ -516,10 +480,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
// For a GT_ADDR, the child node should not be evaluated into a register
GenTreePtr child = tree->gtOp.gtOp1;
- assert(!l->isCandidateLocalRef(child));
- MakeSrcContained(tree, child);
+ assert(!isCandidateLocalRef(child));
+ assert(child->isContained());
+ assert(info->dstCount == 1);
info->srcCount = 0;
- info->dstCount = 1;
}
break;
@@ -531,12 +495,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
// These should all be eliminated prior to Lowering.
assert(!"Non-store block node in Lowering");
info->srcCount = 0;
- info->dstCount = 0;
break;
#ifdef FEATURE_PUT_STRUCT_ARG_STK
case GT_PUTARG_STK:
- LowerPutArgStk(tree->AsPutArgStk());
TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
break;
#endif // FEATURE_PUT_STRUCT_ARG_STK
@@ -544,14 +506,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- LowerBlockStore(tree->AsBlk());
TreeNodeInfoInitBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
// Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
+ assert(!"INIT_VAL should always be contained");
break;
case GT_LCLHEAP:
@@ -562,23 +522,21 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- ContainCheckBoundsChk(tree->AsBoundsChk());
// Consumes arrLen & index - has no result
info->srcCount = GetOperandSourceCount(tree->AsBoundsChk()->gtIndex);
info->srcCount += GetOperandSourceCount(tree->AsBoundsChk()->gtArrLen);
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
case GT_ARR_ELEM:
// These must have been lowered to GT_ARR_INDEX
- noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+ noway_assert(!"We should never see a GT_ARR_ELEM after Lowering.");
info->srcCount = 0;
- info->dstCount = 0;
break;
case GT_ARR_INDEX:
info->srcCount = 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
// times while the result is being computed.
tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
@@ -588,27 +546,26 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_ARR_OFFSET:
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- info->srcCount = 2;
- info->dstCount = 1;
-
- if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+ assert(info->dstCount == 1);
+ if (tree->gtArrOffs.gtOffset->isContained())
{
- MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+ info->srcCount = 2;
}
else
{
info->srcCount++;
// Here we simply need an internal register, which must be different
// from any of the operand's registers, but may be the same as targetReg.
+ info->srcCount = 3;
info->internalIntCount = 1;
}
break;
case GT_LEA:
- // The LEA usually passes its operands through to the GT_IND, in which case we'll
- // clear the info->srcCount and info->dstCount later, but we may be instantiating an address,
- // so we set them here.
+ // The LEA usually passes its operands through to the GT_IND, in which case it will
+ // be contained, but we may be instantiating an address, in which case we set them here.
info->srcCount = 0;
+ assert(info->dstCount == 1);
if (tree->AsAddrMode()->HasBase())
{
info->srcCount++;
@@ -617,70 +574,37 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
{
info->srcCount++;
}
- info->dstCount = 1;
break;
case GT_STOREIND:
- {
- info->srcCount = 2;
- info->dstCount = 0;
- GenTree* src = tree->gtOp.gtOp2;
-
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
TreeNodeInfoInitGCWriteBarrier(tree);
break;
}
-
- // If the source is a containable immediate, make it contained, unless it is
- // an int-size or larger store of zero to memory, because we can generate smaller code
- // by zeroing a register and then storing it.
- if (IsContainableImmed(tree, src) &&
- (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
- {
- MakeSrcContained(tree, src);
- }
- else if (!varTypeIsFloating(tree))
- {
- // Perform recognition of trees with the following structure:
- // StoreInd(addr, BinOp(expr, GT_IND(addr)))
- // to be able to fold this into an instruction of the form
- // BINOP [addr], register
- // where register is the actual place where 'expr' is computed.
- //
- // SSE2 doesn't support RMW form of instructions.
- if (TreeNodeInfoInitIfRMWMemOp(tree))
- {
- break;
- }
- }
-
TreeNodeInfoInitIndir(tree->AsIndir());
- }
- break;
+ break;
case GT_NULLCHECK:
- info->dstCount = 0;
- info->srcCount = 1;
- info->isLocalDefUse = true;
+ assert(info->dstCount == 0);
+ info->srcCount = 1;
break;
case GT_IND:
- info->dstCount = 1;
- info->srcCount = 1;
TreeNodeInfoInitIndir(tree->AsIndir());
+ assert(info->dstCount == 1);
break;
case GT_CATCH_ARG:
info->srcCount = 0;
- info->dstCount = 1;
- info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+ assert(info->dstCount == 1);
+ info->setDstCandidates(this, RBM_EXCEPTION_OBJECT);
break;
#if !FEATURE_EH_FUNCLETS
case GT_END_LFIN:
info->srcCount = 0;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
break;
#endif
@@ -689,6 +613,31 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
JITDUMP("Unexpected node %s in Lower.\n", GenTree::OpName(tree->OperGet()));
unreached();
break;
+
+ case GT_INDEX_ADDR:
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ if (tree->AsIndexAddr()->Index()->TypeGet() == TYP_I_IMPL)
+ {
+ info->internalIntCount = 1;
+ }
+ else
+ {
+ switch (tree->AsIndexAddr()->gtElemSize)
+ {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+
+ default:
+ info->internalIntCount = 1;
+ break;
+ }
+ }
+ break;
} // end switch (tree->OperGet())
// If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
@@ -715,8 +664,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
op1->gtLsraInfo.isTgtPref = true;
// Is this a non-commutative operator, or is op2 a contained memory op?
- // (Note that we can't call IsContained() at this point because it uses exactly the
- // same information we're currently computing.)
// In either case, we need to make op2 remain live until the op is complete, by marking
// the source(s) associated with op2 as "delayFree".
// Note that if op2 of a binary RMW operator is a memory op, even if the operator
@@ -760,8 +707,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
delayUseSrc = op1;
}
- else if ((op2 != nullptr) && (!tree->OperIsCommutative() ||
- (IsContainableMemoryOp(op2, true) && (op2->gtLsraInfo.srcCount == 0))))
+ else if ((op2 != nullptr) &&
+ (!tree->OperIsCommutative() || (isContainableMemoryOp(op2) && (op2->gtLsraInfo.srcCount == 0))))
{
delayUseSrc = op2;
}
@@ -775,11 +722,15 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfoInitCheckByteable(tree);
+ if (tree->IsUnusedValue() && (info->dstCount != 0))
+ {
+ info->isLocalDefUse = true;
+ }
// We need to be sure that we've set info->srcCount and info->dstCount appropriately
assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
}
-void Lowering::SetDelayFree(GenTree* delayUseSrc)
+void LinearScan::SetDelayFree(GenTree* delayUseSrc)
{
// If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
// on the base & index, if any.
@@ -813,10 +764,9 @@ void Lowering::SetDelayFree(GenTree* delayUseSrc)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
+void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree)
{
#ifdef _TARGET_X86_
- LinearScan* l = m_lsra;
TreeNodeInfo* info = &(tree->gtLsraInfo);
// Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
@@ -831,33 +781,33 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
regMaskTP regMask;
if (info->dstCount > 0)
{
- regMask = info->getDstCandidates(l);
+ regMask = info->getDstCandidates(this);
assert(regMask != RBM_NONE);
- info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ info->setDstCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
}
- if (tree->OperIsSimple() && (info->srcCount > 0))
+ if (tree->OperIsSimple())
{
- // No need to set src candidates on a contained child operand.
GenTree* op = tree->gtOp.gtOp1;
- assert(op != nullptr);
- bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
+ if (op != nullptr)
{
- regMask = op->gtLsraInfo.getSrcCandidates(l);
- assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ // No need to set src candidates on a contained child operand.
+ if (!op->isContained())
+ {
+ regMask = op->gtLsraInfo.getSrcCandidates(this);
+ assert(regMask != RBM_NONE);
+ op->gtLsraInfo.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
+ }
}
if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
{
- op = tree->gtOp.gtOp2;
- containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
+ op = tree->gtOp.gtOp2;
+ if (!op->isContained())
{
- regMask = op->gtLsraInfo.getSrcCandidates(l);
+ regMask = op->gtLsraInfo.getSrcCandidates(this);
assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ op->gtLsraInfo.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
}
}
}
@@ -865,8 +815,50 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
#endif //_TARGET_X86_
}
+//------------------------------------------------------------------------------
+// isRMWRegOper: Can this binary tree node be used in a Read-Modify-Write format
+//
+// Arguments:
+// tree - a binary tree node
+//
+// Return Value:
+// Returns true if we can use the read-modify-write instruction form
+//
+// Notes:
+// This is used to determine whether to preference the source to the destination register.
+//
+bool LinearScan::isRMWRegOper(GenTreePtr tree)
+{
+ // TODO-XArch-CQ: Make this more accurate.
+ // For now, We assume that most binary operators are of the RMW form.
+ assert(tree->OperIsBinary());
+
+ if (tree->OperIsCompare() || tree->OperIs(GT_CMP))
+ {
+ return false;
+ }
+
+ switch (tree->OperGet())
+ {
+ // These Opers either support a three op form (i.e. GT_LEA), or do not read/write their first operand
+ case GT_LEA:
+ case GT_STOREIND:
+ case GT_ARR_INDEX:
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ return false;
+
+ // x86/x64 does support a three op multiply when op2|op1 is a contained immediate
+ case GT_MUL:
+ return (!tree->gtOp.gtOp2->isContainedIntOrIImmed() && !tree->gtOp.gtOp1->isContainedIntOrIImmed());
+
+ default:
+ return true;
+ }
+}
+
//------------------------------------------------------------------------
-// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees
+// TreeNodeInfoInitSimple: Sets the srcCount for all the trees
// without special handling based on the tree node type.
//
// Arguments:
@@ -875,11 +867,15 @@ void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
+void LinearScan::TreeNodeInfoInitSimple(GenTree* tree)
{
TreeNodeInfo* info = &(tree->gtLsraInfo);
- unsigned kind = tree->OperKind();
- info->dstCount = tree->IsValue() ? 1 : 0;
+ if (tree->isContained())
+ {
+ info->srcCount = 0;
+ return;
+ }
+ unsigned kind = tree->OperKind();
if (kind & (GTK_CONST | GTK_LEAF))
{
info->srcCount = 0;
@@ -888,12 +884,9 @@ void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
{
if (tree->gtGetOp2IfPresent() != nullptr)
{
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
+ info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
}
+ info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1);
}
else
{
@@ -910,14 +903,10 @@ void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+void LinearScan::TreeNodeInfoInitReturn(GenTree* tree)
{
- ContainCheckRet(tree->AsOp());
-
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
- GenTree* op1 = tree->gtGetOp1();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ GenTree* op1 = tree->gtGetOp1();
#if !defined(_TARGET_64BIT_)
if (tree->TypeGet() == TYP_LONG)
@@ -926,9 +915,9 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
GenTree* loVal = op1->gtGetOp1();
GenTree* hiVal = op1->gtGetOp2();
info->srcCount = 2;
- loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
- hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
- info->dstCount = 0;
+ loVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_LO);
+ hiVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_HI);
+ assert(info->dstCount == 0);
}
else
#endif // !defined(_TARGET_64BIT_)
@@ -936,7 +925,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
regMaskTP useCandidates = RBM_NONE;
info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1;
- info->dstCount = 0;
+ assert(info->dstCount == 0);
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (varTypeIsStruct(tree))
@@ -978,7 +967,7 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
if (useCandidates != RBM_NONE)
{
- op1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+ op1->gtLsraInfo.setSrcCandidates(this, useCandidates);
}
}
}
@@ -992,14 +981,9 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
+void LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree)
{
TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- ContainCheckShiftRotate(tree->AsOp());
-
- info->srcCount = 2;
- info->dstCount = 1;
// For shift operations, we need that the number
// of bits moved gets stored in CL in case
@@ -1011,9 +995,9 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
// We will allow whatever can be encoded - hope you know what you are doing.
if (!shiftBy->isContained())
{
- source->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
- shiftBy->gtLsraInfo.setSrcCandidates(l, RBM_RCX);
- info->setDstCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
+ source->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
+ shiftBy->gtLsraInfo.setSrcCandidates(this, RBM_RCX);
+ info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
if (!tree->isContained())
{
info->srcCount = 2;
@@ -1076,38 +1060,51 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitPutArgReg(
- GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs)
+void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node)
{
assert(node != nullptr);
assert(node->OperIsPutArgReg());
+ node->gtLsraInfo.srcCount = 1;
+ regNumber argReg = node->gtRegNum;
assert(argReg != REG_NA);
- // Each register argument corresponds to one source.
- info.srcCount++;
-
// Set the register requirements for the node.
const regMaskTP argMask = genRegMask(argReg);
- node->gtLsraInfo.setDstCandidates(m_lsra, argMask);
- node->gtLsraInfo.setSrcCandidates(m_lsra, argMask);
+ node->gtLsraInfo.setDstCandidates(this, argMask);
+ node->gtLsraInfo.setSrcCandidates(this, argMask);
// To avoid redundant moves, have the argument operand computed in the
// register in which the argument is passed to the call.
- node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node));
+ node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, getUseCandidates(node));
+}
+//------------------------------------------------------------------------
+// HandleFloatVarArgs: Handle additional register requirements for a varargs call
+//
+// Arguments:
+// call - The call node of interest
+// argNode - The current argument
+//
+// Return Value:
+// None.
+//
+// Notes:
+// In the case of a varargs call, the ABI dictates that if we have floating point args,
+// we must pass the enregistered arguments in both the integer and floating point registers.
+// Since the integer register is not associated with the arg node, we will reserve it as
+// an internal register on the call so that it is not used during the evaluation of the call node
+// (e.g. for the target).
+void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
+{
#if FEATURE_VARARG
- *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet());
-
- // In the case of a varargs call, the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the integer and floating point registers.
- // Since the integer register is not associated with this arg node, we will reserve it as
- // an internal register so that it is not used during the evaluation of the call node
- // (e.g. for the target).
- if (isVarArgs && varTypeIsFloating(node))
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
{
- regNumber targetReg = comp->getCallArgIntRegister(argReg);
- info.setInternalIntCount(info.internalIntCount + 1);
- info.addInternalCandidates(m_lsra, genRegMask(targetReg));
+ *callHasFloatRegArgs = true;
+
+ regNumber argReg = argNode->gtRegNum;
+ regNumber targetReg = compiler->getCallArgIntRegister(argReg);
+ call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1);
+ call->gtLsraInfo.addInternalCandidates(this, genRegMask(targetReg));
}
#endif // FEATURE_VARARG
}
@@ -1121,14 +1118,13 @@ void Lowering::TreeNodeInfoInitPutArgReg(
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call)
{
TreeNodeInfo* info = &(call->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
bool hasMultiRegRetVal = false;
ReturnTypeDesc* retTypeDesc = nullptr;
+ assert(!call->isContained());
info->srcCount = 0;
if (call->TypeGet() != TYP_VOID)
{
@@ -1141,43 +1137,36 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
}
else
{
- info->dstCount = 1;
+ assert(info->dstCount == 1);
}
}
else
{
- info->dstCount = 0;
+ assert(info->dstCount == 0);
}
GenTree* ctrlExpr = call->gtControlExpr;
if (call->gtCallType == CT_INDIRECT)
{
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
ctrlExpr = call->gtCallAddr;
-
-#ifdef _TARGET_X86_
- // Fast tail calls aren't currently supported on x86, but if they ever are, the code
- // below that handles indirect VSD calls will need to be fixed.
- assert(!call->IsFastTailCall() || !call->IsVirtualStub());
-#endif // _TARGET_X86_
}
// set reg requirements on call target represented as control sequence.
if (ctrlExpr != nullptr)
{
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- // call can take a Rm op on x64
-
// In case of fast tail implemented as jmp, make sure that gtControlExpr is
// computed into a register.
- if (!call->IsFastTailCall())
+ if (call->IsFastTailCall())
{
+ {
+ // Fast tail call - make sure that call target is always computed in RAX
+ // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(this, RBM_RAX);
+ }
+ }
#ifdef _TARGET_X86_
+ else
+ {
// On x86, we need to generate a very specific pattern for indirect VSD calls:
//
// 3-byte nop
@@ -1187,24 +1176,11 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// sure that the call target address is computed into EAX in this case.
if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
{
- assert(ctrlExpr->isIndir());
-
- ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET);
- MakeSrcContained(call, ctrlExpr);
- }
- else
-#endif // _TARGET_X86_
- if (ctrlExpr->isIndir())
- {
- MakeSrcContained(call, ctrlExpr);
+ assert(ctrlExpr->isIndir() && ctrlExpr->isContained());
+ ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(this, RBM_VIRTUAL_STUB_TARGET);
}
}
- else
- {
- // Fast tail call - make sure that call target is always computed in RAX
- // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- }
+#endif // _TARGET_X86_
info->srcCount += GetOperandSourceCount(ctrlExpr);
}
@@ -1214,7 +1190,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// the individual specific registers will have no effect.
if (call->IsVarargs())
{
- info->setInternalCandidates(l, RBM_NONE);
+ info->setInternalCandidates(this, RBM_NONE);
}
RegisterType registerType = call->TypeGet();
@@ -1228,55 +1204,37 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
// TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
// correct argument registers.
- info->setDstCandidates(l, RBM_PINVOKE_TCB);
+ info->setDstCandidates(this, RBM_PINVOKE_TCB);
}
else
#endif // _TARGET_X86_
if (hasMultiRegRetVal)
{
assert(retTypeDesc != nullptr);
- info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+ info->setDstCandidates(this, retTypeDesc->GetABIReturnRegs());
}
else if (varTypeIsFloating(registerType))
{
#ifdef _TARGET_X86_
// The return value will be on the X87 stack, and we will need to move it.
- info->setDstCandidates(l, l->allRegs(registerType));
+ info->setDstCandidates(this, allRegs(registerType));
#else // !_TARGET_X86_
- info->setDstCandidates(l, RBM_FLOATRET);
+ info->setDstCandidates(this, RBM_FLOATRET);
#endif // !_TARGET_X86_
}
else if (registerType == TYP_LONG)
{
- info->setDstCandidates(l, RBM_LNGRET);
+ info->setDstCandidates(this, RBM_LNGRET);
}
else
{
- info->setDstCandidates(l, RBM_INTRET);
+ info->setDstCandidates(this, RBM_INTRET);
}
// number of args to a call =
// callRegArgs + (callargs - placeholders, setup, etc)
// there is an explicit thisPtr but it is redundant
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- thisPtrNode->SetContained();
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
bool callHasFloatRegArgs = false;
bool isVarArgs = call->IsVarargs();
@@ -1293,19 +1251,37 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// - a field list
// - a put arg
//
- // Note that this property is statically checked by Lowering::CheckBlock.
+ // Note that this property is statically checked by LinearScan::CheckBlock.
GenTreePtr argNode = list->Current();
+ // Each register argument corresponds to one source.
+ if (argNode->OperIsPutArgReg())
+ {
+ info->srcCount++;
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+ }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (argNode->OperGet() == GT_FIELD_LIST)
+ {
+ for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
+ {
+ assert(entry->Current()->OperIsPutArgReg());
+ info->srcCount++;
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+ // In DEBUG only, check validity with respect to the arg table entry.
+
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
assert(curArgTabEntry);
if (curArgTabEntry->regNum == REG_STK)
{
// late arg that is not passed in a register
- DISPNODE(argNode);
assert(argNode->gtOper == GT_PUTARG_STK);
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the node is TYP_STRUCT and it is put on stack with
@@ -1316,35 +1292,33 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
if (argNode->TypeGet() == TYP_STRUCT)
{
assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
- argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
- argNode->gtLsraInfo.srcCount = 0;
+ assert(argNode->gtLsraInfo.srcCount == 0);
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
-
continue;
}
-
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (argNode->OperGet() == GT_FIELD_LIST)
{
- argNode->SetContained();
+ assert(argNode->isContained());
assert(varTypeIsStruct(argNode) || curArgTabEntry->isStruct);
- unsigned eightbyte = 0;
+ int i = 0;
for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
{
- const regNumber argReg = eightbyte == 0 ? curArgTabEntry->regNum : curArgTabEntry->otherRegNum;
- TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, isVarArgs, &callHasFloatRegArgs);
-
- eightbyte++;
+ const regNumber argReg = (i == 0) ? curArgTabEntry->regNum : curArgTabEntry->otherRegNum;
+ assert(entry->Current()->gtRegNum == argReg);
+ assert(i < 2);
+ i++;
}
}
else
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, isVarArgs,
- &callHasFloatRegArgs);
+ const regNumber argReg = curArgTabEntry->regNum;
+ assert(argNode->gtRegNum == argReg);
}
+#endif // DEBUG
}
// Now, count stack args
@@ -1361,41 +1335,11 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
if (!(args->gtFlags & GTF_LATE_ARG))
{
TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if (argInfo->dstCount != 0)
+ if ((argInfo->dstCount != 0) && !arg->IsArgPlaceHolderNode() && !arg->isContained())
{
argInfo->isLocalDefUse = true;
}
-
- // If the child of GT_PUTARG_STK is a constant, we don't need a register to
- // move it to memory (stack location).
- //
- // On AMD64, we don't want to make 0 contained, because we can generate smaller code
- // by zeroing a register and then storing it. E.g.:
- // xor rdx, rdx
- // mov gword ptr [rsp+28H], rdx
- // is 2 bytes smaller than:
- // mov gword ptr [rsp+28H], 0
- //
- // On x86, we push stack arguments; we don't use 'mov'. So:
- // push 0
- // is 1 byte smaller than:
- // xor rdx, rdx
- // push rdx
-
- argInfo->dstCount = 0;
- if (arg->gtOper == GT_PUTARG_STK)
- {
- GenTree* op1 = arg->gtOp.gtOp1;
- if (IsContainableImmed(arg, op1)
-#if defined(_TARGET_AMD64_)
- && !op1->IsIntegralConst(0)
-#endif // _TARGET_AMD64_
- )
- {
- MakeSrcContained(arg, op1);
- arg->gtLsraInfo.srcCount--;
- }
- }
+ assert(argInfo->dstCount == 0);
}
args = args->gtOp.gtOp2;
}
@@ -1408,7 +1352,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// Don't assign the call target to any of the argument registers because
// we will use them to also pass floating point arguments as required
// by Amd64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ ctrlExpr->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS));
}
#endif // !FEATURE_VARARG
}
@@ -1422,19 +1366,17 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
// Sources are dest address, initVal or source.
// We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 0;
- blkNode->gtLsraInfo.dstCount = 0;
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+ blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr);
+ assert(blkNode->gtLsraInfo.dstCount == 0);
+ blkNode->gtLsraInfo.setInternalCandidates(this, RBM_NONE);
GenTreePtr srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
@@ -1447,10 +1389,14 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
GenTree* initVal = source;
if (initVal->OperIsInitVal())
{
- initVal->SetContained();
+ assert(initVal->isContained());
initVal = initVal->gtGetOp1();
}
srcAddrOrFill = initVal;
+ if (!initVal->isContained())
+ {
+ blkNode->gtLsraInfo.srcCount++;
+ }
switch (blkNode->gtBlkOpKind)
{
@@ -1458,22 +1404,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
assert(initVal->IsCnsIntOrI());
if (size >= XMM_REGSIZE_BYTES)
{
- // Reserve an XMM register to fill it with
- // a pack of 16 init value constants.
- ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+ // Reserve an XMM register to fill it with a pack of 16 init value constants.
blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
- if ((fill == 0) && ((size & 0xf) == 0))
- {
- MakeSrcContained(blkNode, initVal);
- }
- // Use an XMM register to fill with constants; it's an AVX instruction, so set the flags.
+ blkNode->gtLsraInfo.setInternalCandidates(this, internalFloatRegCandidates());
+ // use XMM register to fill with constants, it's AVX instruction and set the flag
SetContainsAVXFlags();
}
- if (!initVal->isContained())
- {
- blkNode->gtLsraInfo.srcCount++;
- }
#ifdef _TARGET_X86_
if ((size & 1) != 0)
{
@@ -1491,7 +1427,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// a) The memory address to be in RDI.
// b) The fill value has to be in RAX.
// c) The buffer size will go in RCX.
- blkNode->gtLsraInfo.srcCount++;
dstAddrRegMask = RBM_RDI;
srcAddrOrFill = initVal;
sourceRegMask = RBM_RAX;
@@ -1501,7 +1436,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
case GenTreeBlk::BlkOpKindHelper:
#ifdef _TARGET_AMD64_
// The helper follows the regular AMD64 ABI.
- blkNode->gtLsraInfo.srcCount++;
dstAddrRegMask = RBM_ARG_0;
sourceRegMask = RBM_ARG_1;
blkSizeRegMask = RBM_ARG_2;
@@ -1521,23 +1455,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// CopyObj or CopyBlk
if (source->gtOper == GT_IND)
{
- srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- source->SetContained();
- source->AsIndir()->Addr()->ClearContained();
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
+ srcAddrOrFill = source->gtGetOp1();
}
if (blkNode->OperGet() == GT_STORE_OBJ)
{
@@ -1564,7 +1482,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
{
blkNode->gtLsraInfo.internalIntCount++;
- regMaskTP regMask = l->allRegs(TYP_INT);
+ regMaskTP regMask = allRegs(TYP_INT);
#ifdef _TARGET_X86_
if ((size & 1) != 0)
@@ -1572,7 +1490,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
regMask &= ~RBM_NON_BYTE_REGS;
}
#endif
- blkNode->gtLsraInfo.setInternalCandidates(l, regMask);
+ blkNode->gtLsraInfo.setInternalCandidates(this, regMask);
}
if (size >= XMM_REGSIZE_BYTES)
@@ -1581,23 +1499,11 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// reserve an XMM register to use it for a
// series of 16-byte loads and stores.
blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
+ blkNode->gtLsraInfo.addInternalCandidates(this, internalFloatRegCandidates());
// Uses XMM reg for load and store and hence check to see whether AVX instructions
// are used for codegen, set ContainsAVX flag
SetContainsAVXFlags();
}
- // If src or dst are on stack, we don't have to generate the address
- // into a register because it's just some constant+SP.
- if ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, srcAddrOrFill);
- }
-
- if (dstAddr->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, dstAddr);
- }
-
break;
case GenTreeBlk::BlkOpKindRepInstr:
@@ -1630,21 +1536,20 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source);
}
- blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(dstAddr);
if (dstAddrRegMask != RBM_NONE)
{
- dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask);
+ dstAddr->gtLsraInfo.setSrcCandidates(this, dstAddrRegMask);
}
if (sourceRegMask != RBM_NONE)
{
if (srcAddrOrFill != nullptr)
{
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, sourceRegMask);
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(this, sourceRegMask);
}
else
{
// This is a local source; we'll use a temp register for its address.
- blkNode->gtLsraInfo.addInternalCandidates(l, sourceRegMask);
+ blkNode->gtLsraInfo.addInternalCandidates(this, sourceRegMask);
blkNode->gtLsraInfo.internalIntCount++;
}
}
@@ -1653,16 +1558,16 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (size != 0)
{
// Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.addInternalCandidates(l, blkSizeRegMask);
+ blkNode->gtLsraInfo.addInternalCandidates(this, blkSizeRegMask);
blkNode->gtLsraInfo.internalIntCount++;
}
else
{
// The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ assert(blkNode->gtOper == GT_STORE_DYN_BLK);
blkNode->gtLsraInfo.setSrcCount(3);
GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask);
+ blockSize->gtLsraInfo.setSrcCandidates(this, blkSizeRegMask);
}
}
}
@@ -1677,11 +1582,11 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
+void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
{
TreeNodeInfo* info = &(putArgStk->gtLsraInfo);
- LinearScan* l = m_lsra;
info->srcCount = 0;
+ assert(info->dstCount == 0);
if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
{
@@ -1699,50 +1604,15 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
const unsigned fieldOffset = current->gtFieldOffset;
assert(fieldType != TYP_LONG);
- // For x86 we must mark all integral fields as contained or reg-optional, and handle them
- // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
- // registers to be consumed atomically by the call.
- if (varTypeIsIntegralOrI(fieldNode))
- {
- if (fieldNode->OperGet() == GT_LCL_VAR)
- {
- LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
- if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
- {
- SetRegOptional(fieldNode);
- }
- else
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- }
- else if (fieldNode->IsIntCnsFitsInI32())
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- else
- {
- // For the case where we cannot directly push the value, if we run out of registers,
- // it would be better to defer computation until we are pushing the arguments rather
- // than spilling, but this situation is not all that common, as most cases of promoted
- // structs do not have a large number of fields, and of those most are lclVars or
- // copy-propagated constants.
- SetRegOptional(fieldNode);
- }
- }
#if defined(FEATURE_SIMD)
- // Note that we need to check the GT_FIELD_LIST type, not the fieldType. This is because the
+ // Note that we need to check the GT_FIELD_LIST type, not 'fieldType'. This is because the
// GT_FIELD_LIST will be TYP_SIMD12 whereas the fieldType might be TYP_SIMD16 for lclVar, where
// we "round up" to 16.
- else if (current->gtFieldType == TYP_SIMD12)
+ if (current->gtFieldType == TYP_SIMD12)
{
needsSimdTemp = true;
}
#endif // defined(FEATURE_SIMD)
- else
- {
- assert(varTypeIsFloating(fieldNode) || varTypeIsSIMD(fieldNode));
- }
// We can treat as a slot any field that is stored at a slot boundary, where the previous
// field is not in the same slot. (Note that we store the fields in reverse order.)
@@ -1771,27 +1641,27 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
}
}
- info->dstCount = 0;
-
if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push)
{
// If any of the fields cannot be stored with an actual push, we may need a temporary
// register to load the value before storing it to the stack location.
info->internalIntCount = 1;
- regMaskTP regMask = l->allRegs(TYP_INT);
+ regMaskTP regMask = allRegs(TYP_INT);
if (needsByteTemp)
{
regMask &= ~RBM_NON_BYTE_REGS;
}
- info->setInternalCandidates(l, regMask);
+ info->setInternalCandidates(this, regMask);
}
#if defined(FEATURE_SIMD)
// For PutArgStk of a TYP_SIMD12, we need a SIMD temp register.
if (needsSimdTemp)
{
+ info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
+ assert(info->dstCount == 0);
info->internalFloatCount += 1;
- info->addInternalCandidates(l, l->allSIMDRegs());
+ info->addInternalCandidates(this, allSIMDRegs());
}
#endif // defined(FEATURE_SIMD)
@@ -1804,9 +1674,8 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
if (putArgStk->TypeGet() == TYP_SIMD12)
{
info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
- info->dstCount = 0;
info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
return;
}
#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
@@ -1821,19 +1690,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr src = putArgStk->gtOp1;
GenTreePtr srcAddr = nullptr;
- bool haveLocalAddr = false;
- if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
- {
- srcAddr = src->gtOp.gtOp1;
- assert(srcAddr != nullptr);
- haveLocalAddr = srcAddr->OperIsLocalAddr();
- }
- else
- {
- assert(varTypeIsSIMD(putArgStk));
- }
-
- info->dstCount = 0;
+ info->srcCount = GetOperandSourceCount(src);
// If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
// Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
@@ -1852,7 +1709,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
if ((putArgStk->gtNumberReferenceSlots == 0) && (size & (XMM_REGSIZE_BYTES - 1)) != 0)
{
info->internalIntCount++;
- regMaskTP regMask = l->allRegs(TYP_INT);
+ regMaskTP regMask = allRegs(TYP_INT);
#ifdef _TARGET_X86_
if ((size % 2) != 0)
@@ -1860,7 +1717,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
regMask &= ~RBM_NON_BYTE_REGS;
}
#endif
- info->setInternalCandidates(l, regMask);
+ info->setInternalCandidates(this, regMask);
}
#ifdef _TARGET_X86_
@@ -1873,30 +1730,19 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
// or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a
// series of 16-byte loads and stores.
info->internalFloatCount = 1;
- info->addInternalCandidates(l, l->internalFloatRegCandidates());
+ info->addInternalCandidates(this, internalFloatRegCandidates());
SetContainsAVXFlags();
}
break;
case GenTreePutArgStk::Kind::RepInstr:
info->internalIntCount += 3;
- info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
+ info->setInternalCandidates(this, (RBM_RDI | RBM_RCX | RBM_RSI));
break;
default:
unreached();
}
-
- // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
- MakeSrcContained(putArgStk, src);
-
- if (haveLocalAddr)
- {
- // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary
- // copies.
- MakeSrcContained(putArgStk, srcAddr);
- }
- info->srcCount = GetOperandSourceCount(src);
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
@@ -1909,15 +1755,12 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
+void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree)
{
- ContainCheckLclHeap(tree->AsOp());
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
info->srcCount = 1;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
// Here '-' means don't care.
@@ -2007,18 +1850,16 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
+void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree)
{
- ContainCheckDivOrMod(tree->AsOp());
TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
GenTree* op1 = tree->gtGetOp1();
GenTree* op2 = tree->gtGetOp2();
info->srcCount = GetOperandSourceCount(op1);
info->srcCount += GetOperandSourceCount(op2);
- info->dstCount = 1;
+ assert(info->dstCount == 1);
if (varTypeIsFloating(tree->TypeGet()))
{
@@ -2033,13 +1874,13 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
{
// We are interested in just the remainder.
// RAX is used as a trashable register during computation of remainder.
- info->setDstCandidates(l, RBM_RDX);
+ info->setDstCandidates(this, RBM_RDX);
}
else
{
// We are interested in just the quotient.
// RDX gets used as trashable register during computation of quotient
- info->setDstCandidates(l, RBM_RAX);
+ info->setDstCandidates(this, RBM_RAX);
}
#ifdef _TARGET_X86_
@@ -2056,21 +1897,21 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
// This situation also requires an internal register.
info->internalIntCount = 1;
- info->setInternalCandidates(l, l->allRegs(TYP_INT));
+ info->setInternalCandidates(this, allRegs(TYP_INT));
- loVal->gtLsraInfo.setSrcCandidates(l, RBM_EAX);
- hiVal->gtLsraInfo.setSrcCandidates(l, RBM_EDX);
+ loVal->gtLsraInfo.setSrcCandidates(this, RBM_EAX);
+ hiVal->gtLsraInfo.setSrcCandidates(this, RBM_EDX);
}
else
#endif
{
// If possible would like to have op1 in RAX to avoid a register move
- op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ op1->gtLsraInfo.setSrcCandidates(this, RBM_RAX);
}
- if (op2->IsRegOptional())
+ if (!op2->isContained())
{
- op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
+ op2->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
}
}
@@ -2083,12 +1924,9 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
+void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree)
{
- ContainCheckIntrinsic(tree->AsOp());
-
TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
// Both operand and its result must be of floating point type.
GenTree* op1 = tree->gtGetOp1();
@@ -2096,7 +1934,7 @@ void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
assert(op1->TypeGet() == tree->TypeGet());
info->srcCount = GetOperandSourceCount(op1);
- info->dstCount = 1;
+ assert(info->dstCount == 1);
switch (tree->gtIntrinsic.gtIntrinsicId)
{
@@ -2119,7 +1957,7 @@ void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs)
{
info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->internalFloatRegCandidates());
+ info->setInternalCandidates(this, internalFloatRegCandidates());
}
break;
@@ -2149,14 +1987,21 @@ void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
// Return Value:
// None.
-void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
{
- GenTreeSIMD* simdTree = tree->AsSIMD();
- ContainCheckSIMD(simdTree);
+ TreeNodeInfo* info = &(simdTree->gtLsraInfo);
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* lsra = m_lsra;
- info->dstCount = 1;
+ // Only SIMDIntrinsicInit can be contained. Other than that,
+ // only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount.
+ if (simdTree->isContained())
+ {
+ assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
+ }
+ else if (info->dstCount != 1)
+ {
+ assert((simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ||
+ (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality));
+ }
SetContainsAVXFlags(true, simdTree->gtSIMDSize);
switch (simdTree->gtSIMDIntrinsicID)
{
@@ -2165,7 +2010,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
case SIMDIntrinsicInit:
{
- op1 = tree->gtOp.gtOp1;
+ op1 = simdTree->gtOp.gtOp1;
#if !defined(_TARGET_64BIT_)
if (op1->OperGet() == GT_LONG)
@@ -2204,7 +2049,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
{
// need a temp
info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
info->isInternalRegDelayFree = true;
info->srcCount = 2;
}
@@ -2223,7 +2068,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// Need an internal register to stitch together all the values into a single vector in a SIMD reg.
info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
}
break;
@@ -2245,7 +2090,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// Must be a Vector<int> or Vector<short> Vector<sbyte>
assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
simdTree->gtSIMDBaseType == TYP_BYTE);
- assert(comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
info->srcCount = 1;
break;
@@ -2268,10 +2113,10 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// SSE2 32-bit integer multiplication requires two temp regs
if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
- comp->getSIMDInstructionSet() == InstructionSet_SSE2)
+ compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
{
info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
}
break;
@@ -2297,13 +2142,11 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
- info->srcCount = 2;
// On SSE4/AVX, we can generate optimal code for (in)equality
// against zero using ptest. We can safely do this optimization
// for integral vectors but not for floating-point for the reason
// that we have +0.0 and -0.0 and +0.0 == -0.0
- op2 = tree->gtGetOp2();
if (simdTree->gtGetOp2()->isContained())
{
info->srcCount = 1;
@@ -2319,7 +2162,20 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// registers reserved are guaranteed to be different from target
// integer register without explicitly specifying.
info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
+ }
+ if (info->isNoRegCompare)
+ {
+ info->dstCount = 0;
+ // Codegen of SIMD (in)Equality uses target integer reg only for setting flags.
+ // A target reg is not needed on AVX when comparing against Vector Zero.
+ // In all other cases we need to reserve an int type internal register if we
+ // don't have a target register on the compare.
+ if (!compiler->canUseAVX() || !simdTree->gtGetOp2()->IsIntegralConstVector(0))
+ {
+ info->internalIntCount = 1;
+ info->addInternalCandidates(this, allRegs(TYP_INT));
+ }
}
break;
@@ -2339,24 +2195,25 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// and the need for scratch registers.
if (varTypeIsFloating(simdTree->gtSIMDBaseType))
{
- if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) ||
+ if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) ||
(simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
{
info->internalFloatCount = 1;
info->isInternalRegDelayFree = true;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
}
// else don't need scratch reg(s).
}
else
{
- assert(simdTree->gtSIMDBaseType == TYP_INT && comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(simdTree->gtSIMDBaseType == TYP_INT &&
+ compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
// No need to set isInternalRegDelayFree since targetReg is a
// an int type reg and guaranteed to be different from xmm/ymm
// regs.
- info->internalFloatCount = comp->canUseAVX() ? 2 : 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->internalFloatCount = compiler->canUseAVX() ? 2 : 1;
+ info->setInternalCandidates(this, allSIMDRegs());
}
info->srcCount = 2;
break;
@@ -2367,9 +2224,10 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// - the source SIMD struct
// - index (which element to get)
// The result is baseType of SIMD struct.
+ // op1 may be a contained memory op, but if so we will consume its address.
info->srcCount = 0;
- op1 = tree->gtOp.gtOp1;
- op2 = tree->gtOp.gtOp2;
+ op1 = simdTree->gtOp.gtOp1;
+ op2 = simdTree->gtOp.gtOp2;
// op2 may be a contained constant.
if (!op2->isContained())
@@ -2401,13 +2259,13 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
info->srcCount++;
if (!op2->IsCnsIntOrI())
{
- (void)comp->getSIMDInitTempVarNum();
+ (void)compiler->getSIMDInitTempVarNum();
}
else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
{
bool needFloatTemp;
if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
- (comp->getSIMDInstructionSet() == InstructionSet_AVX))
+ (compiler->getSIMDInstructionSet() == InstructionSet_AVX))
{
int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
needFloatTemp = (byteShiftCnt >= 16);
@@ -2420,7 +2278,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
if (needFloatTemp)
{
info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
}
}
}
@@ -2434,10 +2292,10 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
info->srcCount = 2;
// We need an internal integer register for SSE2 codegen
- if (comp->getSIMDInstructionSet() == InstructionSet_SSE2)
+ if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
{
info->internalIntCount = 1;
- info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+ info->setInternalCandidates(this, allRegs(TYP_INT));
}
break;
@@ -2454,7 +2312,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
info->isInternalRegDelayFree = true;
info->internalIntCount = 1;
info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs() | lsra->allRegs(TYP_INT));
+ info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
}
break;
@@ -2471,7 +2329,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// We need an internal register different from targetReg.
info->isInternalRegDelayFree = true;
info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
}
break;
@@ -2481,7 +2339,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
info->isInternalRegDelayFree = true;
info->srcCount = 1;
info->internalIntCount = 1;
- if (comp->getSIMDInstructionSet() == InstructionSet_AVX)
+ if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
{
info->internalFloatCount = 2;
}
@@ -2489,7 +2347,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
{
info->internalFloatCount = 1;
}
- info->setInternalCandidates(lsra, lsra->allSIMDRegs() | lsra->allRegs(TYP_INT));
+ info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
break;
case SIMDIntrinsicConvertToDouble:
@@ -2504,7 +2362,8 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
}
else
#endif
- if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) || (simdTree->gtSIMDBaseType == TYP_ULONG))
+ if ((compiler->getSIMDInstructionSet() == InstructionSet_AVX) ||
+ (simdTree->gtSIMDBaseType == TYP_ULONG))
{
info->internalFloatCount = 2;
}
@@ -2512,14 +2371,14 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
{
info->internalFloatCount = 1;
}
- info->setInternalCandidates(lsra, lsra->allSIMDRegs() | lsra->allRegs(TYP_INT));
+ info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
break;
case SIMDIntrinsicNarrow:
// We need an internal register different from targetReg.
info->isInternalRegDelayFree = true;
info->srcCount = 2;
- if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
+ if ((compiler->getSIMDInstructionSet() == InstructionSet_AVX) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
{
info->internalFloatCount = 2;
}
@@ -2527,7 +2386,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
{
info->internalFloatCount = 1;
}
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
break;
case SIMDIntrinsicShuffleSSE2:
@@ -2563,7 +2422,7 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitCast(GenTree* tree)
+void LinearScan::TreeNodeInfoInitCast(GenTree* tree)
{
TreeNodeInfo* info = &(tree->gtLsraInfo);
@@ -2579,7 +2438,7 @@ void Lowering::TreeNodeInfoInitCast(GenTree* tree)
var_types castOpType = castOp->TypeGet();
info->srcCount = GetOperandSourceCount(castOp);
- info->dstCount = 1;
+ assert(info->dstCount == 1);
if (tree->gtFlags & GTF_UNSIGNED)
{
castOpType = genUnsignedType(castOpType);
@@ -2607,7 +2466,7 @@ void Lowering::TreeNodeInfoInitCast(GenTree* tree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
+void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
{
assert(tree->OperGet() == GT_STOREIND);
@@ -2615,25 +2474,11 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
GenTreePtr addr = dst->Addr();
GenTreePtr src = dst->Data();
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- int leaSrcCount = 0;
- if (lea->HasBase())
- {
- leaSrcCount++;
- }
- if (lea->HasIndex())
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
+ // In the case where we are doing a helper assignment, we need to actually instantiate the
+ // address in a register.
+ assert(!addr->isContained());
+ tree->gtLsraInfo.srcCount = 1 + GetIndirSourceCount(dst);
+ assert(tree->gtLsraInfo.dstCount == 0);
bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers.
@@ -2643,7 +2488,7 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
useOptimizedWriteBarrierHelper = true; // On x86, use the optimized write barriers by default.
#ifdef DEBUG
- GCInfo::WriteBarrierForm wbf = comp->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, src);
+ GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, src);
if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
{
useOptimizedWriteBarrierHelper = false;
@@ -2655,8 +2500,8 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
// Special write barrier:
// op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
// op2 (src) goes into any int register.
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_SRC);
+ addr->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER);
+ src->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
}
#else // !defined(_TARGET_X86_)
@@ -2670,8 +2515,8 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
// For the standard JIT Helper calls:
// op1 (addr) goes into REG_ARG_0 and
// op2 (src) goes into REG_ARG_1
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+ addr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0);
+ src->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1);
}
// Both src and dst must reside in a register, which they should since we haven't set
@@ -2686,7 +2531,7 @@ void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
// Arguments:
// indirTree - GT_IND or GT_STOREIND gentree node
//
-void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
+void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
{
// If this is the rhs of a block copy (i.e. non-enregisterable struct),
// it has no register requirements.
@@ -2695,32 +2540,48 @@ void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
return;
}
- ContainCheckIndir(indirTree);
-
- GenTree* addr = indirTree->gtGetOp1();
TreeNodeInfo* info = &(indirTree->gtLsraInfo);
- GenTreePtr base = nullptr;
- GenTreePtr index = nullptr;
- unsigned mul, cns;
- bool rev;
-
info->srcCount = GetIndirSourceCount(indirTree);
if (indirTree->gtOper == GT_STOREIND)
{
GenTree* source = indirTree->gtOp.gtOp2;
if (indirTree->AsStoreInd()->IsRMWMemoryOp())
{
+ // Because 'source' is contained, we haven't yet determined its special register requirements, if any.
+ // As it happens, the Shift or Rotate cases are the only ones with special requirements.
+ assert(source->isContained() && source->OperIsRMWMemOp());
+ GenTree* nonMemSource = nullptr;
+
+ if (source->OperIsShiftOrRotate())
+ {
+ TreeNodeInfoInitShiftRotate(source);
+ }
if (indirTree->AsStoreInd()->IsRMWDstOp1())
{
if (source->OperIsBinary())
{
- info->srcCount += GetOperandSourceCount(source->gtOp.gtOp2);
+ nonMemSource = source->gtOp.gtOp2;
}
}
else if (indirTree->AsStoreInd()->IsRMWDstOp2())
{
- info->srcCount += GetOperandSourceCount(source->gtOp.gtOp1);
+ nonMemSource = source->gtOp.gtOp1;
+ }
+ if (nonMemSource != nullptr)
+ {
+ info->srcCount += GetOperandSourceCount(nonMemSource);
+ assert(!nonMemSource->isContained() || (!nonMemSource->isMemoryOp() && !nonMemSource->IsLocal()));
+#ifdef _TARGET_X86_
+ if (varTypeIsByte(indirTree) && !nonMemSource->isContained())
+ {
+ // If storeInd is of TYP_BYTE, set source to byteable registers.
+ regMaskTP regMask = nonMemSource->gtLsraInfo.getSrcCandidates(this);
+ regMask &= ~RBM_NON_BYTE_REGS;
+ assert(regMask != RBM_NONE);
+ nonMemSource->gtLsraInfo.setSrcCandidates(this, regMask);
+ }
+#endif
}
}
else
@@ -2731,10 +2592,10 @@ void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
if (varTypeIsByte(indirTree) && !source->isContained())
{
// If storeInd is of TYP_BYTE, set source to byteable registers.
- regMaskTP regMask = source->gtLsraInfo.getSrcCandidates(m_lsra);
+ regMaskTP regMask = source->gtLsraInfo.getSrcCandidates(this);
regMask &= ~RBM_NON_BYTE_REGS;
assert(regMask != RBM_NONE);
- source->gtLsraInfo.setSrcCandidates(m_lsra, regMask);
+ source->gtLsraInfo.setSrcCandidates(this, regMask);
}
#endif
}
@@ -2757,7 +2618,7 @@ void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
info->isInternalRegDelayFree = true;
}
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+ info->setInternalCandidates(this, allSIMDRegs());
return;
}
@@ -2775,14 +2636,20 @@ void Lowering::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
+void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree)
{
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP));
- ContainCheckCompare(tree->AsOp());
TreeNodeInfo* info = &(tree->gtLsraInfo);
info->srcCount = 0;
- info->dstCount = tree->OperIs(GT_CMP) ? 0 : 1;
+ if (info->isNoRegCompare)
+ {
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert((info->dstCount == 1) || tree->OperIs(GT_CMP));
+ }
#ifdef _TARGET_X86_
// If the compare is used by a jump, we just need to set the condition codes. If not, then we need
@@ -2790,7 +2657,7 @@ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
// We always set the dst candidates, though, because if this is compare is consumed by a jump, they
// won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
// that flag is maintained until this location (especially for decomposed long compares).
- info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
+ info->setDstCandidates(this, RBM_BYTE_REGS);
#endif // _TARGET_X86_
GenTreePtr op1 = tree->gtOp.gtOp1;
@@ -2798,7 +2665,10 @@ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();
- info->srcCount += GetOperandSourceCount(op1);
+ if (!op1->gtLsraInfo.isNoRegCompare)
+ {
+ info->srcCount += GetOperandSourceCount(op1);
+ }
info->srcCount += GetOperandSourceCount(op2);
#if !defined(_TARGET_64BIT_)
@@ -2816,166 +2686,6 @@ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
#endif // !defined(_TARGET_64BIT_)
}
-//--------------------------------------------------------------------------------------------
-// TreeNodeInfoInitIfRMWMemOp: Checks to see if there is a RMW memory operation rooted at
-// GT_STOREIND node and if so will mark register requirements for nodes under storeInd so
-// that CodeGen will generate a single instruction of the form:
-//
-// binOp [addressing mode], reg
-//
-// Parameters
-// storeInd - GT_STOREIND node
-//
-// Return value
-// True, if RMW memory op tree pattern is recognized and op counts are set.
-// False otherwise.
-//
-bool Lowering::TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd)
-{
- assert(storeInd->OperGet() == GT_STOREIND);
-
- // SSE2 doesn't support RMW on float values
- assert(!varTypeIsFloating(storeInd));
-
- // Terminology:
- // indirDst = memory write of an addr mode (i.e. storeind destination)
- // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
- // indirCandidate = memory read i.e. a gtInd of an addr mode
- // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
-
- GenTreePtr indirCandidate = nullptr;
- GenTreePtr indirOpSource = nullptr;
-
- if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
- {
- JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
- storeInd->AsStoreInd()->GetRMWStatus());
- DISPTREERANGE(BlockRange(), storeInd);
- return false;
- }
-
- GenTreePtr indirDst = storeInd->gtGetOp1();
- GenTreePtr indirSrc = storeInd->gtGetOp2();
- genTreeOps oper = indirSrc->OperGet();
-
- // At this point we have successfully detected a RMW memory op of one of the following forms
- // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
- // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
- // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
- //
- // Here indirSrc = one of the supported binary or unary operation for RMW of memory
- // indirCandidate = a GT_IND node
- // indirCandidateChild = operand of GT_IND indirCandidate
- //
- // The logic below essentially does the following
- // Make indirOpSource contained.
- // Make indirSrc contained.
- // Make indirCandidate contained.
- // Make indirCandidateChild contained.
- // Make indirDst contained except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
- // base.
- // Note that due to the way containment is supported, we accomplish some of the above by clearing operand counts
- // and directly propagating them upward.
- //
-
- TreeNodeInfo* info = &(storeInd->gtLsraInfo);
- info->dstCount = 0;
-
- if (GenTree::OperIsBinary(oper))
- {
- // On Xarch RMW operations require that the non-rmw operand be an immediate or in a register.
- // Therefore, if we have previously marked the indirOpSource as a contained memory op while lowering
- // the binary node, we need to reset that now.
- if (IsContainableMemoryOp(indirOpSource, true))
- {
- indirOpSource->ClearContained();
- }
- assert(!indirOpSource->isContained() || indirOpSource->OperIsConst());
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
- info->srcCount = indirOpSource->gtLsraInfo.dstCount;
- }
- else
- {
- assert(GenTree::OperIsUnary(oper));
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
- info->srcCount = 0;
- }
- DISPTREERANGE(BlockRange(), storeInd);
-
- m_lsra->clearOperandCounts(indirSrc);
- indirSrc->SetContained();
- m_lsra->clearOperandCounts(indirCandidate);
- indirCandidate->SetContained();
-
- GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
- if (indirCandidateChild->OperGet() == GT_LEA)
- {
- GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
-
- if (addrMode->HasBase())
- {
- assert(addrMode->Base()->OperIsLeaf());
- m_lsra->clearOperandCounts(addrMode->Base());
- addrMode->Base()->SetContained();
- info->srcCount++;
- }
-
- if (addrMode->HasIndex())
- {
- assert(addrMode->Index()->OperIsLeaf());
- m_lsra->clearOperandCounts(addrMode->Index());
- addrMode->Index()->SetContained();
- info->srcCount++;
- }
-
- m_lsra->clearOperandCounts(indirDst);
- indirDst->SetContained();
- }
- else
- {
- assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
- indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
-
- // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
- // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
- // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
- // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
- if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
- {
- m_lsra->clearOperandCounts(indirDst);
- indirDst->SetContained();
- }
- else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
- {
- m_lsra->clearOperandCounts(indirDst);
- indirDst->SetContained();
- }
- else
- {
- // Need a reg and hence increment src count of storeind
- info->srcCount += indirCandidateChild->gtLsraInfo.dstCount;
- }
- }
- m_lsra->clearOperandCounts(indirCandidateChild);
- indirCandidateChild->SetContained();
-
-#ifdef _TARGET_X86_
- if (varTypeIsByte(storeInd))
- {
- // If storeInd is of TYP_BYTE, set indirOpSources to byteable registers.
- bool containedNode = indirOpSource->gtLsraInfo.dstCount == 0;
- if (!containedNode)
- {
- regMaskTP regMask = indirOpSource->gtLsraInfo.getSrcCandidates(m_lsra);
- assert(regMask != RBM_NONE);
- indirOpSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
-#endif
-
- return true;
-}
-
//------------------------------------------------------------------------
// TreeNodeInfoInitMul: Set the NodeInfo for a multiply.
//
@@ -2985,22 +2695,19 @@ bool Lowering::TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitMul(GenTreePtr tree)
+void LinearScan::TreeNodeInfoInitMul(GenTreePtr tree)
{
- ContainCheckMul(tree->AsOp());
-
#if defined(_TARGET_X86_)
assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG));
#else
assert(tree->OperIs(GT_MUL, GT_MULHI));
#endif
TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
- info->srcCount = GetOperandSourceCount(op1);
+ GenTree* op1 = tree->gtOp.gtOp1;
+ GenTree* op2 = tree->gtOp.gtOp2;
+ info->srcCount = GetOperandSourceCount(op1);
info->srcCount += GetOperandSourceCount(op2);
- info->dstCount = 1;
+ assert(info->dstCount == 1);
// Case of float/double mul.
if (varTypeIsFloating(tree->TypeGet()))
@@ -3035,19 +2742,19 @@ void Lowering::TreeNodeInfoInitMul(GenTreePtr tree)
// Here we set RAX as the only destination candidate
// In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
//
- info->setDstCandidates(m_lsra, RBM_RAX);
+ info->setDstCandidates(this, RBM_RAX);
}
else if (tree->OperGet() == GT_MULHI)
{
// Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the
// upper 32 bits of the result set the destination candidate to REG_RDX.
- info->setDstCandidates(m_lsra, RBM_RDX);
+ info->setDstCandidates(this, RBM_RDX);
}
#if defined(_TARGET_X86_)
else if (tree->OperGet() == GT_MUL_LONG)
{
// have to use the encoding:RDX:RAX = RAX * rm
- info->setDstCandidates(m_lsra, RBM_RAX);
+ info->setDstCandidates(this, RBM_RAX);
}
#endif
GenTree* containedMemOp = nullptr;
@@ -3075,18 +2782,18 @@ void Lowering::TreeNodeInfoInitMul(GenTreePtr tree)
// isFloatingPointType - true if it is floating point type
// sizeOfSIMDVector - SIMD Vector size
//
-void Lowering::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsigned sizeOfSIMDVector /* = 0*/)
+void LinearScan::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsigned sizeOfSIMDVector /* = 0*/)
{
#ifdef FEATURE_AVX_SUPPORT
if (isFloatingPointType)
{
- if (comp->getFloatingPointInstructionSet() == InstructionSet_AVX)
+ if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
{
- comp->getEmitter()->SetContainsAVX(true);
+ compiler->getEmitter()->SetContainsAVX(true);
}
- if (sizeOfSIMDVector == 32 && comp->getSIMDInstructionSet() == InstructionSet_AVX)
+ if (sizeOfSIMDVector == 32 && compiler->getSIMDInstructionSet() == InstructionSet_AVX)
{
- comp->getEmitter()->SetContains256bitAVX(true);
+ compiler->getEmitter()->SetContains256bitAVX(true);
}
}
#endif
@@ -3103,7 +2810,7 @@ void Lowering::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsign
// Return Value:
// If we need to exclude non-byteable registers
//
-bool Lowering::ExcludeNonByteableRegisters(GenTree* tree)
+bool LinearScan::ExcludeNonByteableRegisters(GenTree* tree)
{
// Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
// Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
@@ -3171,7 +2878,7 @@ bool Lowering::ExcludeNonByteableRegisters(GenTree* tree)
GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
var_types baseType = simdNode->gtSIMDBaseType;
- if (!IsContainableMemoryOp(op1, true) && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType))
+ if (!isContainableMemoryOp(op1) && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType))
{
bool ZeroOrSignExtnReqd = true;
unsigned baseSize = genTypeSize(baseType);
@@ -3214,7 +2921,7 @@ bool Lowering::ExcludeNonByteableRegisters(GenTree* tree)
// Return Value:
// The number of source registers used by the *parent* of this node.
//
-int Lowering::GetOperandSourceCount(GenTree* node)
+int LinearScan::GetOperandSourceCount(GenTree* node)
{
if (!node->isContained())
{
@@ -3230,9 +2937,6 @@ int Lowering::GetOperandSourceCount(GenTree* node)
if (node->OperIsIndir())
{
const unsigned srcCount = GetIndirSourceCount(node->AsIndir());
- // TODO-Cleanup: Once we are doing containment analysis during Lowering, this
- // can be removed, or changed to an assert.
- node->gtLsraInfo.srcCount = 0;
return srcCount;
}