summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorsivarv <sivarv@microsoft.com>2016-07-18 16:56:59 -0700
committersivarv <sivarv@microsoft.com>2016-07-20 12:22:12 -0700
commit0a817e0195e76a94e88ccd6a8779ed6451e8640a (patch)
tree895ab29c2e903aa005954bcd8d484f7fa53034b4 /src
parent1383b557d7be7268fa3e80dac8c908eae335a0e5 (diff)
downloadcoreclr-0a817e0195e76a94e88ccd6a8779ed6451e8640a.tar.gz
coreclr-0a817e0195e76a94e88ccd6a8779ed6451e8640a.tar.bz2
coreclr-0a817e0195e76a94e88ccd6a8779ed6451e8640a.zip
Support for reg optional tree temps.
Diffstat (limited to 'src')
-rwxr-xr-xsrc/jit/codegencommon.cpp23
-rw-r--r--src/jit/codegeninterface.h3
-rwxr-xr-xsrc/jit/codegenxarch.cpp12
-rw-r--r--src/jit/emitxarch.cpp43
-rw-r--r--src/jit/gentree.cpp9
-rw-r--r--src/jit/gentree.h27
-rw-r--r--src/jit/lower.h58
-rw-r--r--src/jit/lowerxarch.cpp258
-rw-r--r--src/jit/lsra.cpp84
-rw-r--r--src/jit/lsra.h21
10 files changed, 418 insertions, 120 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 1196e610ac..ffd5b70c8f 100755
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -1398,6 +1398,29 @@ regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call)
return REG_ARG_0;
}
+//----------------------------------------------------------------------
+// getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
+//
+// Arguments:
+// tree - spilled GenTree node
+//
+// Return Value:
+// TempDsc corresponding to tree
+TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
+{
+ // tree must be in spilled state.
+ assert((tree->gtFlags & GTF_SPILLED) != 0);
+
+ // Get the tree's SpillDsc.
+ RegSet::SpillDsc* prevDsc;
+ RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
+ assert(spillDsc != nullptr);
+
+ // Get the temp desc.
+ TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
+ return temp;
+}
+
#ifdef _TARGET_XARCH_
#ifdef _TARGET_AMD64_
diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h
index cb4774d240..d321b5719a 100644
--- a/src/jit/codegeninterface.h
+++ b/src/jit/codegeninterface.h
@@ -313,6 +313,9 @@ public:
void SpillFloat (regNumber reg, bool bIsCall = false);
#endif // LEGACY_BACKEND
+ // The following method is used by xarch emitter for handling contained tree temps.
+ TempDsc* getSpillTempDsc(GenTree* tree);
+
public:
emitter* getEmitter() { return m_cgEmitter; }
protected:
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index c7fc1fda73..fedee032e4 100755
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -1341,7 +1341,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
{
emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
}
- else if (divisor->gtRegNum == targetReg)
+ else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
{
// It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
// because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
@@ -1466,8 +1466,8 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
// The arithmetic node must be sitting in a register (since it's not contained)
noway_assert(targetReg != REG_NA);
- regNumber op1reg = op1->gtRegNum;
- regNumber op2reg = op2->gtRegNum;
+ regNumber op1reg = op1->isContained() ? REG_NA: op1->gtRegNum;
+ regNumber op2reg = op2->isContained() ? REG_NA: op2->gtRegNum;
GenTreePtr dst;
GenTreePtr src;
@@ -5148,7 +5148,11 @@ void CodeGen::genConsumeRegs(GenTree* tree)
if (tree->isContained())
{
- if (tree->isIndir())
+ if (tree->isContainedSpillTemp())
+ {
+ // spill temps are un-tracked and hence no need to update life
+ }
+ else if (tree->isIndir())
{
genConsumeAddress(tree->AsIndir()->Addr());
}
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 388a51d82d..632cc023e5 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -2893,7 +2893,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
{
dblConst = src->AsDblCon();
}
-
+
// find local field if any
GenTreeLclFld* lclField = nullptr;
if (src->isContainedLclField())
@@ -2918,9 +2918,25 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
lclVar = dst->AsLclVar();
}
+ // find contained spill tmp if any
+ TempDsc* tmpDsc = nullptr;
+ if (src->isContainedSpillTemp())
+ {
+ assert(src->IsRegOptional());
+ tmpDsc = codeGen->getSpillTempDsc(src);
+ }
+ else if (dst->isContainedSpillTemp())
+ {
+ assert(dst->IsRegOptional());
+ tmpDsc = codeGen->getSpillTempDsc(dst);
+ }
+
// First handle the simple non-memory cases
//
- if ((mem == nullptr) && (lclField == nullptr) && (lclVar == nullptr))
+ if ((mem == nullptr) &&
+ (lclField == nullptr) &&
+ (lclVar == nullptr) &&
+ (tmpDsc == nullptr))
{
if (intConst != nullptr)
{
@@ -2959,7 +2975,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
// Next handle the cases where we have a stack based local memory operand.
//
unsigned varNum = BAD_VAR_NUM;
- unsigned offset = (unsigned) -1;
+ unsigned offset = (unsigned)-1;
if (lclField != nullptr)
{
@@ -2971,12 +2987,22 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
varNum = lclVar->AsLclVarCommon()->GetLclNum();
offset = 0;
}
+ else if (tmpDsc != nullptr)
+ {
+ varNum = tmpDsc->tdTempNum();
+ offset = 0;
+ }
- if (varNum != BAD_VAR_NUM)
+ // Spill temp numbers are negative and start with -1
+ // which also happens to be BAD_VAR_NUM. For this reason
+ // we also need to check 'tmpDsc != nullptr' here.
+ if (varNum != BAD_VAR_NUM ||
+ tmpDsc != nullptr)
{
// Is the memory op in the source position?
if (src->isContainedLclField() ||
- src->isContainedLclVar())
+ src->isContainedLclVar() ||
+ src->isContainedSpillTemp())
{
if (instrHasImplicitRegPairDest(ins))
{
@@ -2993,7 +3019,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
}
else // The memory op is in the dest position.
{
- assert(dst->gtRegNum == REG_NA);
+ assert(dst->gtRegNum == REG_NA || dst->IsRegOptional());
// src could be int or reg
if (src->isContainedIntOrIImmed())
@@ -3011,6 +3037,11 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
}
}
+ if (tmpDsc != nullptr)
+ {
+ emitComp->tmpRlsTemp(tmpDsc);
+ }
+
return dst->gtRegNum;
}
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 54e294847b..d5b4cba90b 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -13317,13 +13317,22 @@ GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps *pOper)
// until after the LSRA phase has allocated physical registers to the treenodes.
bool GenTree::isContained() const
{
+ if (isContainedSpillTemp())
+ {
+ return true;
+ }
+
if (gtHasReg())
+ {
return false;
+ }
// these actually produce a register (the flags reg, we just don't model it)
// and are a separate instruction from the branch that consumes the result
if (OperKind() & GTK_RELOP)
+ {
return false;
+ }
// TODO-Cleanup : this is not clean, would be nice to have some way of marking this.
switch (OperGet())
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 85202ec533..ef2cf7ee05 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -495,7 +495,9 @@ public:
bool isContainedLclField() const { return isContained() && isLclField(); }
- bool isContainedLclVar() const { return isContained() && (OperGet() == GT_LCL_VAR); }
+ bool isContainedLclVar() const { return isContained() && (OperGet() == GT_LCL_VAR); }
+
+ bool isContainedSpillTemp() const;
// Indicates whether it is a memory op.
// Right now it includes Indir and LclField ops.
@@ -503,7 +505,7 @@ public:
bool isContainedMemoryOp() const
{
- return (isContained() && isMemoryOp()) || isContainedLclVar();
+ return (isContained() && isMemoryOp()) || isContainedLclVar() || isContainedSpillTemp();
}
regNumber GetRegNum() const
@@ -665,11 +667,13 @@ public:
#define GTF_REVERSE_OPS 0x00000020 // operand op2 should be evaluated before op1 (normally, op1 is evaluated first and op2 is evaluated second)
#define GTF_REG_VAL 0x00000040 // operand is sitting in a register (or part of a TYP_LONG operand is sitting in a register)
- #define GTF_SPILLED 0x00000080 // the value has been spilled
- #define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled
+ #define GTF_SPILLED 0x00000080 // the value has been spilled
#ifdef LEGACY_BACKEND
- #define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled
+ #define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled
+ #define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled
+#else
+ #define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use
#endif // LEGACY_BACKEND
#define GTF_REDINDEX_CHECK 0x00000100 // Used for redundant range checks. Disjoint from GTF_SPILLED_OPER
@@ -4498,6 +4502,19 @@ GenTreeBlkOp::HasGCPtr()
return false;
}
+inline bool GenTree::isContainedSpillTemp() const
+{
+#if !defined(LEGACY_BACKEND)
+ // If spilled and no reg at use, then it is treated as contained.
+ if (((gtFlags & GTF_SPILLED) != 0) &&
+ ((gtFlags & GTF_NOREG_AT_USE) != 0))
+ {
+ return true;
+ }
+#endif //!LEGACY_BACKEND
+
+ return false;
+}
/*****************************************************************************/
diff --git a/src/jit/lower.h b/src/jit/lower.h
index 4baeb7e1fc..9f62978a62 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -143,8 +143,62 @@ private:
void TreeNodeInfoInit(GenTreePtr* tree, GenTree* parent);
#if defined(_TARGET_XARCH_)
void TreeNodeInfoInitSimple(GenTree* tree);
- void SetRegOptionalForBinOp(GenTree* tree);
- void TryToSetRegOptional(GenTree* operand);
+
+ //----------------------------------------------------------------------
+ // SetRegOptional - sets a bit to indicate to LSRA that register
+ // for a given tree node is optional for codegen purpose. If no
+ // register is allocated to such a tree node, its parent node treats
+ // it as a contained memory operand during codegen.
+ //
+ // Arguments:
+ // tree - GenTree node
+ //
+ // Returns
+ // None
+ void SetRegOptional(GenTree* tree)
+ {
+ tree->gtLsraInfo.regOptional = true;
+ }
+
+ GenTree* PreferredRegOptionalOperand(GenTree* tree);
+
+ // ------------------------------------------------------------------
+ // SetRegOptionalBinOp - Indicates which of the operands of a bin-op
+ // register requirement is optional. Xarch instruction set allows
+ // either of op1 or op2 of binary operation (e.g. add, mul etc) to be
+ // a memory operand. This routine provides info to register allocator
+ // which of its operands optionally require a register. Lsra might not
+ // allocate a register to RefTypeUse positions of such operands if it
+ // is beneficial. In such a case codegen will treat them as memory
+ // operands.
+ //
+ // Arguments:
+ // tree - Gentree of a bininary operation.
+ //
+ // Returns
+ // None.
+ //
+ // Note: On xarch at most only one of the operands will be marked as
+ // reg optional, even when both operands could be considered register
+ // optional.
+ void SetRegOptionalForBinOp(GenTree* tree)
+ {
+ assert(GenTree::OperIsBinary(tree->OperGet()));
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ if (tree->OperIsCommutative() &&
+ tree->TypeGet() == op1->TypeGet())
+ {
+ GenTree* preferredOp = PreferredRegOptionalOperand(tree);
+ SetRegOptional(preferredOp);
+ }
+ else if (tree->TypeGet() == op2->TypeGet())
+ {
+ SetRegOptional(op2);
+ }
+ }
#endif // defined(_TARGET_XARCH_)
void TreeNodeInfoInitReturn(GenTree* tree);
void TreeNodeInfoInitShiftRotate(GenTree* tree);
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index d41239cc40..8353f2c81c 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -561,7 +561,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
info->srcCount = 2;
info->dstCount = 0;
- GenTreePtr other = nullptr;
+ GenTreePtr other;
if (CheckImmedAndMakeContained(tree, node->gtIndex))
{
other = node->gtArrLen;
@@ -579,17 +579,17 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
other = node->gtArrLen;
}
- if (other->isMemoryOp())
+ if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
{
- if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
+ if (other->isMemoryOp())
{
MakeSrcContained(tree, other);
}
- }
- else
- {
- // since 'other' operand is not contained, we can mark it as reg optional
- TryToSetRegOptional(other);
+ else
+ {
+ // We can mark 'other' as reg optional, since it is not contained.
+ SetRegOptional(other);
+ }
}
}
break;
@@ -2087,7 +2087,8 @@ Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
else
{
// If there are no containable operands, we can make an operand reg optional.
- SetRegOptionalForBinOp(tree);
+ // SSE2 allows only op2 to be a memory-op.
+ SetRegOptional(op2);
}
return;
@@ -2128,7 +2129,8 @@ Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
// If there are no containable operands, we can make an operand reg optional.
- SetRegOptionalForBinOp(tree);
+ // Div instruction allows only op2 to be a memory op.
+ SetRegOptional(op2);
}
}
@@ -2167,7 +2169,7 @@ Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
{
// Mark the operand as reg optional since codegen can still
// generate code if op1 is on stack.
- TryToSetRegOptional(op1);
+ SetRegOptional(op1);
}
break;
@@ -2503,7 +2505,7 @@ Lowering::TreeNodeInfoInitCast(GenTree* tree)
{
// Mark castOp as reg optional to indicate codegen
// can still generate code if it is on stack.
- TryToSetRegOptional(castOp);
+ SetRegOptional(castOp);
}
}
}
@@ -2861,18 +2863,16 @@ void Lowering::LowerCmp(GenTreePtr tree)
{
MakeSrcContained(tree, otherOp);
}
- else if (otherOp->isMemoryOp())
+ else if (otherOp->isMemoryOp() &&
+ ((otherOp == op2) || IsSafeToContainMem(tree, otherOp)))
{
- if ((otherOp == op2) || IsSafeToContainMem(tree, otherOp))
- {
- MakeSrcContained(tree, otherOp);
- }
+ MakeSrcContained(tree, otherOp);
}
else
{
- // Mark otherOp as reg optional to indicate codgen can still generate
- // code even if otherOp is on stack.
- TryToSetRegOptional(otherOp);
+ // SSE2 allows only otherOp to be a memory-op. Since otherOp is not
+ // contained, we can mark it reg-optional.
+ SetRegOptional(otherOp);
}
return;
@@ -2950,6 +2950,7 @@ void Lowering::LowerCmp(GenTreePtr tree)
}
}
}
+
if (op1CanBeContained)
{
if (op1->isMemoryOp())
@@ -2957,7 +2958,9 @@ void Lowering::LowerCmp(GenTreePtr tree)
MakeSrcContained(tree, op1);
}
else
- {
+ {
+ bool op1IsMadeContained = false;
+
// When op1 is a GT_AND we can often generate a single "test" instruction
// instead of two instructions (an "and" instruction followed by a "cmp"/"test")
//
@@ -3083,6 +3086,7 @@ void Lowering::LowerCmp(GenTreePtr tree)
}
// Mark the 'op1' (the GT_AND) operand as contained
MakeSrcContained(tree, op1);
+ op1IsMadeContained = true;
// During Codegen we will now generate "test andOp1, andOp2CnsVal"
}
@@ -3127,8 +3131,8 @@ void Lowering::LowerCmp(GenTreePtr tree)
assert(!castOp1->gtOverflowEx()); // Must not be an overflow checking operation
GenTreePtr removeTreeNode = op1;
- GenTreePtr removeTreeNodeChild = castOp1;
tree->gtOp.gtOp1 = castOp1;
+ op1 = castOp1;
castOp1->gtType = TYP_UBYTE;
// trim down the value if castOp1 is an int constant since its type changed to UBYTE.
@@ -3150,6 +3154,7 @@ void Lowering::LowerCmp(GenTreePtr tree)
if (castOp1->isMemoryOp())
{
MakeSrcContained(tree, op1);
+ op1IsMadeContained = true;
}
}
}
@@ -3163,6 +3168,12 @@ void Lowering::LowerCmp(GenTreePtr tree)
#endif
}
}
+
+ // If not made contained, op1 can be marked as reg-optional.
+ if (!op1IsMadeContained)
+ {
+ SetRegOptional(op1);
+ }
}
}
}
@@ -3181,12 +3192,7 @@ void Lowering::LowerCmp(GenTreePtr tree)
// One of op1 or op2 could be marked as reg optional
// to indicate that codgen can still generate code
// if one of them is on stack.
- TryToSetRegOptional(op2);
-
- if (!op2->IsRegOptional())
- {
- TryToSetRegOptional(op1);
- }
+ SetRegOptional(PreferredRegOptionalOperand(tree));
}
if (varTypeIsSmall(op1Type) && varTypeIsUnsigned(op1Type))
@@ -3717,6 +3723,10 @@ void Lowering::SetMulOpCounts(GenTreePtr tree)
bool requiresOverflowCheck = tree->gtOverflowEx();
bool useLeaEncoding = false;
GenTreePtr memOp = nullptr;
+
+ bool hasImpliedFirstOperand = false;
+ GenTreeIntConCommon* imm = nullptr;
+ GenTreePtr other = nullptr;
// There are three forms of x86 multiply:
// one-op form: RDX:RAX = RAX * r/m
@@ -3740,26 +3750,25 @@ void Lowering::SetMulOpCounts(GenTreePtr tree)
// In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
//
info->setDstCandidates(m_lsra,RBM_RAX);
+ hasImpliedFirstOperand = true;
}
else if (tree->gtOper == GT_MULHI)
{
// have to use the encoding:RDX:RAX = RAX * rm
info->setDstCandidates(m_lsra, RBM_RAX);
+ hasImpliedFirstOperand = true;
}
else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1))
{
- GenTreeIntConCommon* imm;
- GenTreePtr other;
-
if (IsContainableImmed(tree, op2))
{
imm = op2->AsIntConCommon();
- other = op1;
+ other = op1;
}
else
{
imm = op1->AsIntConCommon();
- other = op2;
+ other = op2;
}
// CQ: We want to rewrite this into a LEA
@@ -3770,11 +3779,12 @@ void Lowering::SetMulOpCounts(GenTreePtr tree)
}
MakeSrcContained(tree, imm); // The imm is always contained
- if (other->isIndir())
+ if (other->isMemoryOp())
{
memOp = other; // memOp may be contained below
}
}
+
// We allow one operand to be a contained memory operand.
// The memory op type must match with the 'tree' type.
// This is because during codegen we use 'tree' type to derive EmitTypeSize.
@@ -3790,17 +3800,28 @@ void Lowering::SetMulOpCounts(GenTreePtr tree)
//
if (!useLeaEncoding)
{
- if (memOp != nullptr)
+ if ((memOp != nullptr) &&
+ (memOp->TypeGet() == tree->TypeGet()) &&
+ IsSafeToContainMem(tree, memOp))
{
- if ((memOp->TypeGet() == tree->TypeGet()) &&
- IsSafeToContainMem(tree, memOp))
- {
- MakeSrcContained(tree, memOp);
- }
+ MakeSrcContained(tree, memOp);
+ }
+ else if (imm != nullptr)
+ {
+ // Has a contained immediate operand.
+ // Only 'other' operand can be marked as reg optional.
+ assert(other != nullptr);
+ SetRegOptional(other);
+ }
+ else if (hasImpliedFirstOperand)
+ {
+ // Only op2 can be marke as reg optional.
+ SetRegOptional(op2);
}
else
{
- // If there are no containable operands, we can make an operand reg optional.
+ // If there are no containable operands, we can make either of op1 or op2
+ // as reg optional.
SetRegOptionalForBinOp(tree);
}
}
@@ -3869,67 +3890,128 @@ bool Lowering:: IsContainableImmed(GenTree* parentNode, GenTree* childNode)
return true;
}
-//----------------------------------------------------------------------
-// TryToSetRegOptional - sets a bit to indicate to LSRA that register
-// for a given tree node is optional for codegen purpose. If no
-// register is allocated to such a tree node, its parent node treats
-// it as a contained memory operand during codegen.
-//
-// Arguments:
-// tree - GenTree node
+//-----------------------------------------------------------------------
+// PreferredRegOptionalOperand: returns one of the operands of given
+// binary oper that is to be preferred for marking as reg optional.
//
-// Returns
-// None
-//
-// Note: Right now a tree node is marked as reg optional only
-// if is it a GT_LCL_VAR. This routine needs to be modified if
-// in future if lower/codegen needs to support other tree node
-// types.
-void Lowering::TryToSetRegOptional(GenTree* tree)
-{
- if (tree->OperGet() == GT_LCL_VAR)
- {
- tree->gtLsraInfo.regOptional = true;
- }
-}
-
-// ------------------------------------------------------------------
-// SetRegOptionalBinOp - Indicates which of the operands of a bin-op
-// register requirement is optional. Xarch instruction set allows
-// either of op1 or op2 of binary operation (e.g. add, mul etc) to be
-// a memory operand. This routine provides info to register allocator
-// which of its operands optionally require a register. Lsra might not
-// allocate a register to RefTypeUse positions of such operands if it
-// is beneficial. In such a case codegen will treat them as memory
-// operands.
+// Since only one of op1 or op2 can be a memory operand on xarch, only
+// one of them have to be marked as reg optional. Since Lower doesn't
+// know apriori which of op1 or op2 is not likely to get a register, it
+// has to make a guess. This routine encapsulates heuristics that
+// guess whether it is likely to be beneficial to mark op1 or op2 as
+// reg optional.
+//
//
// Arguments:
-// tree - Gentree of a bininary operation.
+// tree - a binary-op tree node that is either commutative
+// or a compare oper.
//
-// Returns
-// None.
-//
-// Note: On xarch at most only one of the operands will be marked as
-// reg optional, even when both operands could be considered register
-// optional.
-void Lowering::SetRegOptionalForBinOp(GenTree* tree)
+// Returns:
+// Returns op1 or op2 of tree node that is preferred for
+// marking as reg optional.
+//
+// Note: if the tree oper is neither commutative nor a compare oper
+// then only op2 can be reg optional on xarch and hence no need to
+// call this routine.
+GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
{
assert(GenTree::OperIsBinary(tree->OperGet()));
+ assert(tree->OperIsCommutative() || tree->OperIsCompare());
GenTree* op1 = tree->gtGetOp1();
GenTree* op2 = tree->gtGetOp2();
+ GenTree* preferredOp = nullptr;
- if (tree->TypeGet() == op2->TypeGet())
+ // This routine uses the following heuristics:
+ //
+ // a) If both are tracked locals, marking the one with lower weighted
+ // ref count as reg-optional would likely be beneficial as it has
+ // higher probability of not getting a register.
+ //
+ // b) op1 = tracked local and op2 = untracked local: LSRA creates two
+ // ref positions for op2: a def and use position. op2's def position
+ // requires a reg and it is allocated a reg by spilling another
+ // interval (if required) and that could be even op1. For this reason
+ // it is beneficial to mark op1 as reg optional.
+ //
+ // TODO: It is not always mandatory for a def position of an untracked
+ // local to be allocated a register if it is on rhs of an assignment
+ // and its use position is reg-optional and has not been assigned a
+ // register. Reg optional def positions is currently not yet supported.
+ //
+ // c) op1 = untracked local and op2 = tracked local: marking op1 as
+ // reg optional is beneficial, since its use position is less likely
+ // to get a register.
+ //
+ // d) If both are untracked locals (i.e. treated like tree temps by
+ // LSRA): though either of them could be marked as reg optional,
+ // marking op1 as reg optional is likely to be beneficial because
+ // while allocating op2's def position, there is a possibility of
+ // spilling op1's def and in which case op1 is treated as contained
+ // memory operand rather than requiring to reload.
+ //
+ // e) If only one of them is a local var, prefer to mark it as
+ // reg-optional. This is heuristic is based on the results
+ // obtained against CQ perf benchmarks.
+ //
+ // f) If neither of them are local vars (i.e. tree temps), prefer to
+ // mark op1 as reg optional for the same reason as mentioned in (d) above.
+ if (op1->OperGet() == GT_LCL_VAR &&
+ op2->OperGet() == GT_LCL_VAR)
{
- TryToSetRegOptional(op2);
- }
+ LclVarDsc* v1 = comp->lvaTable + op1->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* v2 = comp->lvaTable + op2->AsLclVarCommon()->GetLclNum();
- if (!op2->IsRegOptional() &&
- tree->OperIsCommutative() &&
- tree->TypeGet() == op1->TypeGet())
+ if (v1->lvTracked && v2->lvTracked)
+ {
+ // Both are tracked locals. The one with lower weight is less likely
+ // to get a register and hence beneficial to mark the one with lower
+ // weight as reg optional.
+ if (v1->lvRefCntWtd < v2->lvRefCntWtd)
+ {
+ preferredOp = op1;
+ }
+ else
+ {
+ preferredOp = op2;
+ }
+ }
+ else if (v2->lvTracked)
+ {
+ // v1 is an untracked lcl and it is use position is less likely to
+ // get a register.
+ preferredOp = op1;
+ }
+ else if (v1->lvTracked)
+ {
+ // v2 is an untracked lcl and its def position always
+ // needs a reg. Hence it is better to mark v1 as
+ // reg optional.
+ preferredOp = op1;
+ }
+ else
+ {
+ preferredOp = op1;;
+ }
+ }
+ else if (op1->OperGet() == GT_LCL_VAR)
+ {
+ preferredOp = op1;
+ }
+ else if (op2->OperGet() == GT_LCL_VAR)
{
- TryToSetRegOptional(op1);
+ preferredOp = op2;
}
+ else
+ {
+ // Neither of the operands is a local, prefer marking
+ // operand that is evaluated first as reg optional
+ // since its use position is less likely to get a register.
+ bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+ preferredOp = reverseOps ? op2 : op1;
+ }
+
+ return preferredOp;
}
#endif // _TARGET_XARCH_
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 9be61e4523..266d68ec60 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -752,6 +752,7 @@ LinearScan::newRefPosition(regNumber reg,
newRP->registerAssignment = mask;
newRP->setMultiRegIdx(0);
+ newRP->setAllocateIfProfitable(0);
associateRefPosWithInterval(newRP);
@@ -835,6 +836,7 @@ LinearScan::newRefPosition(Interval* theInterval,
newRP->registerAssignment = mask;
newRP->setMultiRegIdx(multiRegIdx);
+ newRP->setAllocateIfProfitable(0);
associateRefPosWithInterval(newRP);
@@ -3023,6 +3025,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
pos->isLocalDefUse = true;
bool isLastUse = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
pos->lastUse = isLastUse;
+ pos->setAllocateIfProfitable(tree->IsRegOptional());
DBEXEC(VERBOSE, pos->dump());
return;
}
@@ -3216,6 +3219,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
prefSrcInterval = i;
}
+ bool regOptionalAtUse = useNode->IsRegOptional();
bool isLastUse = true;
if (isCandidateLocalRef(useNode))
{
@@ -3224,7 +3228,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
else
{
// For non-localVar uses we record nothing,
- // as nothing needs to be written back to the tree)
+ // as nothing needs to be written back to the tree.
useNode = nullptr;
}
@@ -3260,7 +3264,15 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
pos->delayRegFree = true;
}
- if (isLastUse) pos->lastUse = true;
+ if (isLastUse)
+ {
+ pos->lastUse = true;
+ }
+
+ if (regOptionalAtUse)
+ {
+ pos->setAllocateIfProfitable(1);
+ }
}
JITDUMP("\n");
@@ -5145,6 +5157,26 @@ LinearScan::allocateBusyReg(Interval* current,
else
{
isBetterLocation = (nextLocation > farthestLocation);
+
+ if (nextLocation > farthestLocation)
+ {
+ isBetterLocation = true;
+ }
+ else if (nextLocation == farthestLocation)
+ {
+ // Both weight and distance are equal.
+ // Prefer that ref position which is marked both reload and
+ // allocate if profitable. These ref positions don't need
+ // need to be spilled as they are already in memory and
+ // codegen considers them as contained memory operands.
+ isBetterLocation = (recentAssignedRef != nullptr) &&
+ recentAssignedRef->reload &&
+ recentAssignedRef->AllocateIfProfitable();
+ }
+ else
+ {
+ isBetterLocation = false;
+ }
}
}
@@ -7395,7 +7427,11 @@ LinearScan::recordMaxSpill()
void
LinearScan::updateMaxSpill(RefPosition* refPosition)
{
- if (refPosition->spillAfter || refPosition->reload)
+ RefType refType = refPosition->refType;
+
+ if (refPosition->spillAfter ||
+ refPosition->reload ||
+ (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA))
{
Interval* interval = refPosition->getInterval();
if (!interval->isLocalVar)
@@ -7406,8 +7442,8 @@ LinearScan::updateMaxSpill(RefPosition* refPosition)
// 8-byte non-GC items, and 16-byte or 32-byte SIMD vectors.
// LSRA is agnostic to those choices but needs
// to know what they are here.
- RefType refType = refPosition->refType;
var_types typ;
+
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if ((refType == RefTypeUpperVectorSaveDef) || (refType == RefTypeUpperVectorSaveUse))
{
@@ -7419,7 +7455,7 @@ LinearScan::updateMaxSpill(RefPosition* refPosition)
GenTreePtr treeNode = refPosition->treeNode;
if (treeNode == nullptr)
{
- assert(RefTypeIsUse(refPosition->refType));
+ assert(RefTypeIsUse(refType));
treeNode = interval->firstRefPosition->treeNode;
}
assert(treeNode != nullptr);
@@ -7451,6 +7487,17 @@ LinearScan::updateMaxSpill(RefPosition* refPosition)
assert(currentSpill[typ] > 0);
currentSpill[typ]--;
}
+ else if (refPosition->AllocateIfProfitable() &&
+ refPosition->assignedReg() == REG_NA)
+ {
+ // A spill temp not getting reloaded into a reg because it is
+ // marked as allocate if profitable and getting used from its
+ // memory location. To properly account max spill for typ we
+ // decrement spill count.
+ assert(RefTypeIsUse(refType));
+ assert(currentSpill[typ] > 0);
+ currentSpill[typ]--;
+ }
JITDUMP(" Max spill for %s is %d\n", varTypeName(typ), maxSpill[typ]);
}
}
@@ -7669,18 +7716,20 @@ LinearScan::resolveRegisters()
if (treeNode == nullptr)
{
// This is either a use, a dead def, or a field of a struct
- Interval * interval = currentRefPosition->getInterval();
+ Interval* interval = currentRefPosition->getInterval();
assert(currentRefPosition->refType == RefTypeUse ||
currentRefPosition->registerAssignment == RBM_NONE ||
interval->isStructField);
+
// TODO-Review: Need to handle the case where any of the struct fields
// are reloaded/spilled at this use
assert(!interval->isStructField ||
(currentRefPosition->reload == false &&
currentRefPosition->spillAfter == false));
+
if (interval->isLocalVar && !interval->isStructField)
{
- LclVarDsc * varDsc = interval->getLocalVar(compiler);
+ LclVarDsc* varDsc = interval->getLocalVar(compiler);
// This must be a dead definition. We need to mark the lclVar
// so that it's not considered a candidate for lvRegister, as
@@ -7688,6 +7737,7 @@ LinearScan::resolveRegisters()
assert(currentRefPosition->refType == RefTypeDef);
varDsc->lvRegNum = REG_STK;
}
+
JITDUMP("No tree node to write back to\n");
continue;
}
@@ -7784,7 +7834,25 @@ LinearScan::resolveRegisters()
if (INDEBUG(alwaysInsertReload() ||)
nextRefPosition->assignedReg() != currentRefPosition->assignedReg())
{
- insertCopyOrReload(treeNode, currentRefPosition->getMultiRegIdx(), nextRefPosition);
+ if (nextRefPosition->assignedReg() != REG_NA)
+ {
+ insertCopyOrReload(treeNode, currentRefPosition->getMultiRegIdx(), nextRefPosition);
+ }
+ else
+ {
+ assert(nextRefPosition->AllocateIfProfitable());
+
+ // In case of tree temps, if def is spilled and use didn't
+ // get a register, set a flag on tree node to be treated as
+ // contained at the point of its use.
+ if (currentRefPosition->spillAfter &&
+ currentRefPosition->refType == RefTypeDef &&
+ nextRefPosition->refType == RefTypeUse)
+ {
+ assert(nextRefPosition->treeNode == nullptr);
+ treeNode->gtFlags |= GTF_NOREG_AT_USE;
+ }
+ }
}
}
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index 11af9be3aa..9ce2bd79c7 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -1372,22 +1372,29 @@ public:
) && !AllocateIfProfitable();
}
- // Returns true whether this ref position is to be allocated
- // a reg only if it is profitable. Currently these are the
+ // Indicates whether this ref position is to be allocated
+ // a reg only if profitable. Currently these are the
// ref positions that lower/codegen has indicated as reg
// optional and is considered a contained memory operand if
// no reg is allocated.
+ unsigned allocRegIfProfitable : 1;
+
+ void setAllocateIfProfitable(unsigned val)
+ {
+ allocRegIfProfitable = val;
+ }
+
+ // Returns true whether this ref position is to be allocated
+ // a reg only if it is profitable.
bool AllocateIfProfitable()
{
// TODO-CQ: Right now if a ref position is marked as
// copyreg or movereg, then it is not treated as
// 'allocate if profitable'. This is an implementation
// limitation that needs to be addressed.
- return (refType == RefTypeUse) &&
- !copyReg &&
- !moveReg &&
- (treeNode != nullptr) &&
- treeNode->IsRegOptional();
+ return allocRegIfProfitable &&
+ !copyReg &&
+ !moveReg;
}
// Used by RefTypeDef/Use positions of a multi-reg call node.