summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/jit/CMakeLists.txt1
-rw-r--r--src/jit/compiler.h6
-rw-r--r--src/jit/gentree.cpp15
-rw-r--r--src/jit/gentree.h15
-rw-r--r--src/jit/hwintrinsicArm64.cpp51
-rw-r--r--src/jit/hwintrinsicxarch.cpp40
-rw-r--r--src/jit/lsra.cpp258
-rw-r--r--src/jit/lsra.h441
-rw-r--r--src/jit/lsraarm.cpp526
-rw-r--r--src/jit/lsraarm64.cpp749
-rw-r--r--src/jit/lsraarmarch.cpp348
-rw-r--r--src/jit/lsrabuild.cpp2095
-rw-r--r--src/jit/lsraxarch.cpp2066
-rw-r--r--src/jit/nodeinfo.h153
14 files changed, 3310 insertions, 3454 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
index 1eb1c430b6..c9c280a79a 100644
--- a/src/jit/CMakeLists.txt
+++ b/src/jit/CMakeLists.txt
@@ -152,7 +152,6 @@ if (WIN32)
lsra_reftypes.h
lsra.h
namedintrinsiclist.h
- nodeinfo.h
objectalloc.h
opcode.h
phase.h
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index c3634a365f..8dd365428d 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -2957,6 +2957,11 @@ protected:
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand);
+
+public:
+ static int numArgsOfHWIntrinsic(GenTreeHWIntrinsic* node);
+
+protected:
#ifdef _TARGET_XARCH_
static InstructionSet lookupHWIntrinsicISA(const char* className);
static NamedIntrinsic lookupHWIntrinsic(const char* methodName, InstructionSet isa);
@@ -3016,7 +3021,6 @@ protected:
public:
static HWIntrinsicCategory categoryOfHWIntrinsic(NamedIntrinsic intrinsic);
- static int numArgsOfHWIntrinsic(GenTreeHWIntrinsic* node);
protected:
static HWIntrinsicFlag flagsOfHWIntrinsic(NamedIntrinsic intrinsic);
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 8cc44645c3..70477f88d6 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -737,16 +737,9 @@ int GenTree::GetRegisterDstCount() const
GenTree* temp = const_cast<GenTree*>(this);
return temp->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
}
- else if (IsCopyOrReloadOfMultiRegCall())
+ else if (IsCopyOrReload())
{
- // A multi-reg copy or reload, will have valid regs for only those
- // positions that need to be copied or reloaded. Hence we need
- // to consider only those registers for computing reg mask.
-
- GenTree* tree = const_cast<GenTree*>(this);
- GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
- GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
- return call->GetReturnTypeDesc()->GetReturnRegCount();
+ return gtGetOp1()->GetRegisterDstCount();
}
#if defined(_TARGET_ARM_)
else if (OperIsPutArgSplit())
@@ -7019,6 +7012,10 @@ GenTree* Compiler::gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg
#if defined(_TARGET_ARM_)
// A PUTARG_REG could be a MultiRegOp on arm since we could move a double register to two int registers.
node = new (this, GT_PUTARG_REG) GenTreeMultiRegOp(GT_PUTARG_REG, type, arg, nullptr);
+ if (type == TYP_LONG)
+ {
+ node->AsMultiRegOp()->gtOtherReg = REG_NEXT(argReg);
+ }
#else
node = gtNewOperNode(GT_PUTARG_REG, type, arg);
#endif
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index a66f6c6d2b..5723b060f1 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -26,7 +26,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "valuenumtype.h"
#include "jitstd.h"
#include "jithashtable.h"
-#include "nodeinfo.h"
#include "simd.h"
#include "namedintrinsiclist.h"
@@ -1302,6 +1301,20 @@ public:
return OperIsShiftOrRotate(OperGet());
}
+ static bool OperIsMul(genTreeOps gtOper)
+ {
+ return (gtOper == GT_MUL) || (gtOper == GT_MULHI)
+#if !defined(_TARGET_64BIT_)
+ || (gtOper == GT_MUL_LONG)
+#endif
+ ;
+ }
+
+ bool OperIsMul() const
+ {
+ return OperIsMul(gtOper);
+ }
+
bool OperIsArithmetic() const
{
genTreeOps op = OperGet();
diff --git a/src/jit/hwintrinsicArm64.cpp b/src/jit/hwintrinsicArm64.cpp
index 7f5f53220c..b9038f10ec 100644
--- a/src/jit/hwintrinsicArm64.cpp
+++ b/src/jit/hwintrinsicArm64.cpp
@@ -134,6 +134,57 @@ bool Compiler::impCheckImmediate(GenTree* immediateOp, unsigned int max)
}
//------------------------------------------------------------------------
+// numArgsOfHWIntrinsic: gets the number of arguments for the hardware intrinsic.
+// This attempts to do a table based lookup but will fallback to the number
+// of operands in 'node' if the table entry is -1.
+//
+// Arguments:
+// node -- GenTreeHWIntrinsic* node with nullptr default value
+//
+// Return Value:
+// number of arguments
+//
+int Compiler::numArgsOfHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+ NamedIntrinsic intrinsic = node->gtHWIntrinsicId;
+
+ assert(intrinsic != NI_Illegal);
+ assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
+
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ int numArgs = 0;
+
+ if (op1 == nullptr)
+ {
+ return 0;
+ }
+
+ if (op1->OperIsList())
+ {
+ numArgs = 0;
+ GenTreeArgList* list = op1->AsArgList();
+
+ while (list != nullptr)
+ {
+ numArgs++;
+ list = list->Rest();
+ }
+
+ // We should only use a list if we have 3 operands.
+ assert(numArgs >= 3);
+ return numArgs;
+ }
+
+ if (op2 == nullptr)
+ {
+ return 1;
+ }
+
+ return 2;
+}
+
+//------------------------------------------------------------------------
// impHWIntrinsic: dispatch hardware intrinsics to their own implementation
// function
//
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index 002a22026c..59274865c8 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -221,9 +221,6 @@ unsigned Compiler::simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_I
return simdSize;
}
-// TODO_XARCH-CQ - refactoring of numArgsOfHWIntrinsic fast path into inlinable
-// function and slow local static function may increase performance significantly
-
//------------------------------------------------------------------------
// numArgsOfHWIntrinsic: gets the number of arguments for the hardware intrinsic.
// This attempts to do a table based lookup but will fallback to the number
@@ -255,36 +252,33 @@ int Compiler::numArgsOfHWIntrinsic(GenTreeHWIntrinsic* node)
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
- if (op2 != nullptr)
+ if (op1 == nullptr)
{
- return 2;
+ return 0;
}
- if (op1 != nullptr)
+ if (op1->OperIsList())
{
- if (op1->OperIsList())
- {
- numArgs = 0;
- GenTreeArgList* list = op1->AsArgList();
+ numArgs = 0;
+ GenTreeArgList* list = op1->AsArgList();
- while (list != nullptr)
- {
- numArgs++;
- list = list->Rest();
- }
-
- assert(numArgs > 0);
- return numArgs;
- }
- else
+ while (list != nullptr)
{
- return 1;
+ numArgs++;
+ list = list->Rest();
}
+
+ // We should only use a list if we have 3 operands.
+ assert(numArgs >= 3);
+ return numArgs;
}
- else
+
+ if (op2 == nullptr)
{
- return 0;
+ return 1;
}
+
+ return 2;
}
//------------------------------------------------------------------------
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index bb2711a97d..befaac2241 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -13,11 +13,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Preconditions
- All register requirements are expressed in the code stream, either as destination
registers of tree nodes, or as internal registers. These requirements are
- expressed in the TreeNodeInfo computed for each node, which includes:
- - The number of register sources and destinations.
+ expressed in the RefPositions built for each node by BuildNode(), which includes:
+ - The register uses and definitions.
- The register restrictions (candidates) of the target register, both from itself,
as producer of the value (dstCandidates), and from its consuming node (srcCandidates).
- Note that the srcCandidates field of TreeNodeInfo refers to the destination register
+ Note that when we talk about srcCandidates we are referring to the destination register
(not any of its sources).
- The number (internalCount) of registers required, and their register restrictions (internalCandidates).
These are neither inputs nor outputs of the node, but used in the sequence of code generated for the tree.
@@ -53,7 +53,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
tree node (an "implicit" definition), this is the register to put the result.
For an expression use, this is the place to find the value that has previously
been computed.
- - In most cases, this register must satisfy the constraints specified by the TreeNodeInfo.
+ - In most cases, this register must satisfy the constraints specified for the RefPosition.
- In some cases, this is difficult:
- If a lclVar node currently lives in some register, it may not be desirable to move it
(i.e. its current location may be desirable for future uses, e.g. if it's a callee save register,
@@ -236,66 +236,13 @@ regMaskTP LinearScan::allRegs(RegisterType rt)
}
}
-//--------------------------------------------------------------------------
-// allMultiRegCallNodeRegs: represents a set of registers that can be used
-// to allocate a multi-reg call node.
-//
-// Arguments:
-// call - Multi-reg call node
-//
-// Return Value:
-// Mask representing the set of available registers for multi-reg call
-// node.
-//
-// Note:
-// Multi-reg call node available regs = Bitwise-OR(allregs(GetReturnRegType(i)))
-// for all i=0..RetRegCount-1.
-regMaskTP LinearScan::allMultiRegCallNodeRegs(GenTreeCall* call)
-{
- assert(call->HasMultiRegRetVal());
-
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- regMaskTP resultMask = allRegs(retTypeDesc->GetReturnRegType(0));
-
- unsigned count = retTypeDesc->GetReturnRegCount();
- for (unsigned i = 1; i < count; ++i)
- {
- resultMask |= allRegs(retTypeDesc->GetReturnRegType(i));
- }
-
- return resultMask;
-}
-
-//--------------------------------------------------------------------------
-// allRegs: returns the set of registers that can accomodate the type of
-// given node.
-//
-// Arguments:
-// tree - GenTree node
-//
-// Return Value:
-// Mask representing the set of available registers for given tree
-//
-// Note: In case of multi-reg call node, the full set of registers must be
-// determined by looking at types of individual return register types.
-// In this case, the registers may include registers from different register
-// sets and will not be limited to the actual ABI return registers.
-regMaskTP LinearScan::allRegs(GenTree* tree)
+regMaskTP LinearScan::allByteRegs()
{
- regMaskTP resultMask;
-
- // In case of multi-reg calls, allRegs is defined as
- // Bitwise-Or(allRegs(GetReturnRegType(i)) for i=0..ReturnRegCount-1
- if (tree->IsMultiRegCall())
- {
- resultMask = allMultiRegCallNodeRegs(tree->AsCall());
- }
- else
- {
- resultMask = allRegs(tree->TypeGet());
- }
-
- return resultMask;
+#ifdef _TARGET_X86_
+ return availableIntRegs & RBM_BYTE_REGS;
+#else
+ return availableIntRegs;
+#endif
}
regMaskTP LinearScan::allSIMDRegs()
@@ -664,9 +611,8 @@ LinearScan::LinearScan(Compiler* theCompiler)
, listNodePool(theCompiler)
{
#ifdef DEBUG
- maxNodeLocation = 0;
- activeRefPosition = nullptr;
- specialPutArgCount = 0;
+ maxNodeLocation = 0;
+ activeRefPosition = nullptr;
// Get the value of the environment variable that controls stress for register allocation
lsraStressMask = JitConfig.JitStressRegs();
@@ -698,7 +644,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
else if (dump == true)
{
printf("JitStressRegs = %x for method %s, hash = 0x%x.\n",
- lsraStressMask, compiler->info.compFullName, compiler->info.compMethodHash());
+ lsraStressMask, compiler->info.compFullName, compiler->info.compMethodHash());
printf(""); // in our logic this causes a flush
}
}
@@ -743,8 +689,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
// Block sequencing (the order in which we schedule).
// Note that we don't initialize the bbVisitedSet until we do the first traversal
- // (currently during Lowering's second phase, where it sets the TreeNodeInfo).
- // This is so that any blocks that are added during the first phase of Lowering
+ // This is so that any blocks that are added during the first traversal
// are accounted for (and we don't have BasicBlockEpoch issues).
blockSequencingDone = false;
blockSequence = nullptr;
@@ -755,106 +700,10 @@ LinearScan::LinearScan(Compiler* theCompiler)
// Information about each block, including predecessor blocks used for variable locations at block entry.
blockInfo = nullptr;
- // Populate the register mask table.
- // The first two masks in the table are allint/allfloat
- // The next N are the masks for each single register.
- // After that are the dynamically added ones.
- regMaskTable = new (compiler, CMK_LSRA) regMaskTP[numMasks];
- regMaskTable[ALLINT_IDX] = allRegs(TYP_INT);
- regMaskTable[ALLFLOAT_IDX] = allRegs(TYP_DOUBLE);
-
- regNumber reg;
- for (reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
- {
- regMaskTable[FIRST_SINGLE_REG_IDX + reg - REG_FIRST] = (reg == REG_STK) ? RBM_NONE : genRegMask(reg);
- }
- nextFreeMask = FIRST_SINGLE_REG_IDX + REG_COUNT;
- noway_assert(nextFreeMask <= numMasks);
-}
-
-// Return the reg mask corresponding to the given index.
-regMaskTP LinearScan::GetRegMaskForIndex(RegMaskIndex index)
-{
- assert(index < numMasks);
- assert(index < nextFreeMask);
- return regMaskTable[index];
+ pendingDelayFree = false;
+ tgtPrefUse = nullptr;
}
-// Given a reg mask, return the index it corresponds to. If it is not a 'well known' reg mask,
-// add it at the end. This method has linear behavior in the worst cases but that is fairly rare.
-// Most methods never use any but the well-known masks, and when they do use more
-// it is only one or two more.
-LinearScan::RegMaskIndex LinearScan::GetIndexForRegMask(regMaskTP mask)
-{
- RegMaskIndex result;
- if (isSingleRegister(mask))
- {
- result = genRegNumFromMask(mask) + FIRST_SINGLE_REG_IDX;
- }
- else if (mask == allRegs(TYP_INT))
- {
- result = ALLINT_IDX;
- }
- else if (mask == allRegs(TYP_DOUBLE))
- {
- result = ALLFLOAT_IDX;
- }
- else
- {
- for (int i = FIRST_SINGLE_REG_IDX + REG_COUNT; i < nextFreeMask; i++)
- {
- if (regMaskTable[i] == mask)
- {
- return i;
- }
- }
-
- // We only allocate a fixed number of masks. Since we don't reallocate, we will throw a
- // noway_assert if we exceed this limit.
- noway_assert(nextFreeMask < numMasks);
-
- regMaskTable[nextFreeMask] = mask;
- result = nextFreeMask;
- nextFreeMask++;
- }
- assert(mask == regMaskTable[result]);
- return result;
-}
-
-// We've decided that we can't use one or more registers during register allocation (probably FPBASE),
-// but we've already added it to the register masks. Go through the masks and remove it.
-void LinearScan::RemoveRegistersFromMasks(regMaskTP removeMask)
-{
- if (VERBOSE)
- {
- JITDUMP("Removing registers from LSRA register masks: ");
- INDEBUG(dumpRegMask(removeMask));
- JITDUMP("\n");
- }
-
- regMaskTP mask = ~removeMask;
- for (int i = 0; i < nextFreeMask; i++)
- {
- regMaskTable[i] &= mask;
- }
-
- JITDUMP("After removing registers:\n");
- DBEXEC(VERBOSE, dspRegisterMaskTable());
-}
-
-#ifdef DEBUG
-void LinearScan::dspRegisterMaskTable()
-{
- printf("LSRA register masks. Total allocated: %d, total used: %d\n", numMasks, nextFreeMask);
- for (int i = 0; i < nextFreeMask; i++)
- {
- printf("%2u: ", i);
- dspRegMask(regMaskTable[i]);
- printf("\n");
- }
-}
-#endif // DEBUG
-
//------------------------------------------------------------------------
// getNextCandidateFromWorkList: Get the next candidate for block sequencing
//
@@ -902,9 +751,6 @@ BasicBlock* LinearScan::getNextCandidateFromWorkList()
// will be allocated.
// This method clears the bbVisitedSet on LinearScan, and when it returns the set
// contains all the bbNums for the block.
-// This requires a traversal of the BasicBlocks, and could potentially be
-// combined with the first traversal (currently the one in Lowering that sets the
-// TreeNodeInfo).
void LinearScan::setBlockSequence()
{
@@ -2490,11 +2336,7 @@ void LinearScan::setFrameType()
}
// If we are using FPBASE as the frame register, we cannot also use it for
- // a local var. Note that we may have already added it to the register masks,
- // which are computed when the LinearScan class constructor is created, and
- // used during lowering. Luckily, the TreeNodeInfo only stores an index to
- // the masks stored in the LinearScan class, so we only need to walk the
- // unique masks and remove FPBASE.
+ // a local var.
regMaskTP removeMask = RBM_NONE;
if (frameType == FT_EBP_FRAME)
{
@@ -2517,7 +2359,6 @@ void LinearScan::setFrameType()
if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0))
{
- RemoveRegistersFromMasks(removeMask);
// We know that we're already in "read mode" for availableIntRegs. However,
// we need to remove these registers, so subsequent users (like callers
// to allRegs()) get the right thing. The RemoveRegistersFromMasks() code
@@ -5459,7 +5300,7 @@ void LinearScan::allocateRegisters()
{
assert(!currentInterval->isLocalVar);
Interval* srcInterval = currentInterval->relatedInterval;
- assert(srcInterval->isLocalVar);
+ assert(srcInterval != nullptr && srcInterval->isLocalVar);
if (refType == RefTypeDef)
{
assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1);
@@ -8656,11 +8497,6 @@ void RefPosition::dump()
{
printf("<RefPosition #%-3u @%-3u", rpNum, nodeLocation);
- if (nextRefPosition)
- {
- printf(" ->#%-3u", nextRefPosition->rpNum);
- }
-
printf(" %s ", getRefTypeName(refType));
if (this->isPhysRegRef)
@@ -8847,43 +8683,67 @@ void RegRecord::tinyDump()
printf("<Reg:%-3s> ", getRegName(regNum));
}
-void TreeNodeInfo::dump(LinearScan* lsra)
+void LinearScan::dumpNodeInfo(GenTree* node, regMaskTP dstCandidates, int srcCount, int dstCount)
{
- printf("<TreeNodeInfo %d=%d %di %df", dstCount, srcCount, internalIntCount, internalFloatCount);
+ if (!VERBOSE)
+ {
+ return;
+ }
+ // This is formatted like the old dump to make diffs easier. TODO-Cleanup: improve.
+ int internalIntCount = 0;
+ int internalFloatCount = 0;
+ regMaskTP internalCandidates = RBM_NONE;
+ for (int i = 0; i < internalCount; i++)
+ {
+ RefPosition* def = internalDefs[i];
+ if (def->getInterval()->registerType == TYP_INT)
+ {
+ internalIntCount++;
+ }
+ else
+ {
+ internalFloatCount++;
+ }
+ internalCandidates |= def->registerAssignment;
+ }
+ if (dstCandidates == RBM_NONE)
+ {
+ dstCandidates = varTypeIsFloating(node) ? allRegs(TYP_FLOAT) : allRegs(TYP_INT);
+ }
+ if (internalCandidates == RBM_NONE)
+ {
+ internalCandidates = allRegs(TYP_INT);
+ }
+ printf(" +<TreeNodeInfo %d=%d %di %df", dstCount, srcCount, internalIntCount, internalFloatCount);
printf(" src=");
- dumpRegMask(getSrcCandidates(lsra));
+ dumpRegMask(varTypeIsFloating(node) ? allRegs(TYP_FLOAT) : allRegs(TYP_INT));
printf(" int=");
- dumpRegMask(getInternalCandidates(lsra));
+ dumpRegMask(internalCandidates);
printf(" dst=");
- dumpRegMask(getDstCandidates(lsra));
- if (isLocalDefUse)
+ dumpRegMask(dstCandidates);
+ if (node->IsUnusedValue())
{
printf(" L");
}
- if (isInitialized)
- {
- printf(" I");
- }
- if (isDelayFree)
+ printf(" I");
+ if (pendingDelayFree)
{
printf(" D");
}
- if (isTgtPref)
- {
- printf(" P");
- }
- if (isInternalRegDelayFree)
+ if (setInternalRegsDelayFree)
{
printf(" ID");
}
printf(">");
+ node->dumpLIRFlags();
+ printf("\n consume= %d produce=%d\n", srcCount, dstCount);
}
void LinearScan::dumpDefList()
{
JITDUMP("DefList: { ");
bool first = true;
- for (LocationInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
+ for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
listNode = listNode->Next())
{
GenTree* node = listNode->treeNode;
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index 40edb2dee7..3e70332e9c 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -8,7 +8,6 @@
#include "arraylist.h"
#include "smallhash.h"
-#include "nodeinfo.h"
// Minor and forward-reference types
class Interval;
@@ -80,79 +79,78 @@ inline regMaskTP calleeSaveRegs(RegisterType rt)
}
//------------------------------------------------------------------------
-// LocationInfo: Captures the necessary information for a node that is "in-flight"
-// during `buildIntervals` (i.e. its definition has been encountered,
-// but not its use).
+// RefInfo: Captures the necessary information for a definition that is "in-flight"
+// during `buildIntervals` (i.e. a tree-node definition has been encountered,
+// but not its use). This includes the RefPosition and its associated
+// GenTree node.
//
-struct LocationInfo
+struct RefInfo
{
- Interval* interval;
+ RefPosition* ref;
GenTree* treeNode;
- LsraLocation loc;
- TreeNodeInfo info;
- LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : interval(i), treeNode(t), loc(l)
+ RefInfo(RefPosition* r, GenTree* t) : ref(r), treeNode(t)
{
}
// default constructor for data structures
- LocationInfo()
+ RefInfo()
{
}
};
//------------------------------------------------------------------------
-// LocationInfoListNode: used to store a single `LocationInfo` value for a
-// node during `buildIntervals`.
+// RefInfoListNode: used to store a single `RefInfo` value for a
+// node during `buildIntervals`.
//
-// This is the node type for `LocationInfoList` below.
+// This is the node type for `RefInfoList` below.
//
-class LocationInfoListNode final : public LocationInfo
+class RefInfoListNode final : public RefInfo
{
- friend class LocationInfoList;
- friend class LocationInfoListNodePool;
+ friend class RefInfoList;
+ friend class RefInfoListNodePool;
- LocationInfoListNode* m_next; // The next node in the list
+ RefInfoListNode* m_next; // The next node in the list
public:
- LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx)
+ RefInfoListNode(RefPosition* r, GenTree* t) : RefInfo(r, t)
{
}
//------------------------------------------------------------------------
- // LocationInfoListNode::Next: Returns the next node in the list.
- LocationInfoListNode* Next() const
+ // RefInfoListNode::Next: Returns the next node in the list.
+ RefInfoListNode* Next() const
{
return m_next;
}
};
//------------------------------------------------------------------------
-// LocationInfoList: used to store a list of `LocationInfo` values for a
+// RefInfoList: used to store a list of `RefInfo` values for a
// node during `buildIntervals`.
//
-// This list of 'LocationInfoListNode's contains the source nodes consumed by
+// This list of 'RefInfoListNode's contains the source nodes consumed by
// a node, and is created by 'BuildNode'.
//
-class LocationInfoList final
+class RefInfoList final
{
- friend class LocationInfoListNodePool;
+ friend class RefInfoListNodePool;
- LocationInfoListNode* m_head; // The head of the list
- LocationInfoListNode* m_tail; // The tail of the list
+ RefInfoListNode* m_head; // The head of the list
+ RefInfoListNode* m_tail; // The tail of the list
public:
- LocationInfoList() : m_head(nullptr), m_tail(nullptr)
+ RefInfoList() : m_head(nullptr), m_tail(nullptr)
{
}
- LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node)
+ RefInfoList(RefInfoListNode* node) : m_head(node), m_tail(node)
{
assert(m_head->m_next == nullptr);
}
//------------------------------------------------------------------------
- // LocationInfoList::IsEmpty: Returns true if the list is empty.
+ // RefInfoList::IsEmpty: Returns true if the list is empty.
//
bool IsEmpty() const
{
@@ -160,40 +158,40 @@ public:
}
//------------------------------------------------------------------------
- // LocationInfoList::Begin: Returns the first node in the list.
+ // RefInfoList::Begin: Returns the first node in the list.
//
- LocationInfoListNode* Begin() const
+ RefInfoListNode* Begin() const
{
return m_head;
}
//------------------------------------------------------------------------
- // LocationInfoList::End: Returns the position after the last node in the
+ // RefInfoList::End: Returns the position after the last node in the
// list. The returned value is suitable for use as
// a sentinel for iteration.
//
- LocationInfoListNode* End() const
+ RefInfoListNode* End() const
{
return nullptr;
}
//------------------------------------------------------------------------
- // LocationInfoList::End: Returns the position after the last node in the
+ // RefInfoList::End: Returns the position after the last node in the
// list. The returned value is suitable for use as
// a sentinel for iteration.
//
- LocationInfoListNode* Last() const
+ RefInfoListNode* Last() const
{
return m_tail;
}
//------------------------------------------------------------------------
- // LocationInfoList::Append: Appends a node to the list.
+ // RefInfoList::Append: Appends a node to the list.
//
// Arguments:
// node - The node to append. Must not be part of an existing list.
//
- void Append(LocationInfoListNode* node)
+ void Append(RefInfoListNode* node)
{
assert(node->m_next == nullptr);
@@ -210,12 +208,12 @@ public:
m_tail = node;
}
//------------------------------------------------------------------------
- // LocationInfoList::Append: Appends another list to this list.
+ // RefInfoList::Append: Appends another list to this list.
//
// Arguments:
// other - The list to append.
//
- void Append(LocationInfoList other)
+ void Append(RefInfoList other)
{
if (m_tail == nullptr)
{
@@ -231,12 +229,12 @@ public:
}
//------------------------------------------------------------------------
- // LocationInfoList::Prepend: Prepends a node to the list.
+ // RefInfoList::Prepend: Prepends a node to the list.
//
// Arguments:
// node - The node to prepend. Must not be part of an existing list.
//
- void Prepend(LocationInfoListNode* node)
+ void Prepend(RefInfoListNode* node)
{
assert(node->m_next == nullptr);
@@ -254,13 +252,13 @@ public:
}
//------------------------------------------------------------------------
- // LocationInfoList::Add: Adds a node to the list.
+ // RefInfoList::Add: Adds a node to the list.
//
// Arguments:
// node - The node to add. Must not be part of an existing list.
// prepend - True if it should be prepended (otherwise is appended)
//
- void Add(LocationInfoListNode* node, bool prepend)
+ void Add(RefInfoListNode* node, bool prepend)
{
if (prepend)
{
@@ -273,105 +271,108 @@ public:
}
//------------------------------------------------------------------------
- // removeListNode - retrieve the TreeNodeInfo for the given node
+ // removeListNode - retrieve the RefInfo for the given node
//
// Notes:
- // The BuildNode methods use this helper to retrieve the TreeNodeInfo for child nodes
- // from the useList being constructed. Note that, if the user knows the order of the operands,
- // it is expected that they should just retrieve them directly.
-
- LocationInfoListNode* removeListNode(GenTree* node)
+ // The BuildNode methods use this helper to retrieve the RefInfo for child nodes
+ // from the useList being constructed.
+ //
+ RefInfoListNode* removeListNode(RefInfoListNode* listNode, RefInfoListNode* prevListNode)
{
- LocationInfoListNode* prevListNode = nullptr;
- for (LocationInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
+ RefInfoListNode* nextNode = listNode->Next();
+ if (prevListNode == nullptr)
{
- if (listNode->treeNode == node)
- {
- LocationInfoListNode* nextNode = listNode->Next();
- if (prevListNode == nullptr)
- {
- m_head = nextNode;
- }
- else
- {
- prevListNode->m_next = nextNode;
- }
- if (nextNode == nullptr)
- {
- m_tail = prevListNode;
- }
- listNode->m_next = nullptr;
- return listNode;
- }
- prevListNode = listNode;
+ m_head = nextNode;
}
- assert(!"removeListNode didn't find the node");
- unreached();
+ else
+ {
+ prevListNode->m_next = nextNode;
+ }
+ if (nextNode == nullptr)
+ {
+ m_tail = prevListNode;
+ }
+ listNode->m_next = nullptr;
+ return listNode;
}
+ // removeListNode - remove the RefInfoListNode for the given GenTree node from the defList
+ RefInfoListNode* removeListNode(GenTree* node);
+ // Same as above but takes a multiRegIdx to support multi-reg nodes.
+ RefInfoListNode* removeListNode(GenTree* node, unsigned multiRegIdx);
+
//------------------------------------------------------------------------
- // GetTreeNodeInfo - retrieve the TreeNodeInfo for the given node
+ // GetRefPosition - retrieve the RefPosition for the given node
//
// Notes:
- // The Build methods use this helper to retrieve the TreeNodeInfo for child nodes
+ // The Build methods use this helper to retrieve the RefPosition for child nodes
// from the useList being constructed. Note that, if the user knows the order of the operands,
// it is expected that they should just retrieve them directly.
- TreeNodeInfo& GetTreeNodeInfo(GenTree* node)
+ RefPosition* GetRefPosition(GenTree* node)
{
- for (LocationInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
+ for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
{
if (listNode->treeNode == node)
{
- return listNode->info;
+ return listNode->ref;
}
}
- assert(!"GetTreeNodeInfo didn't find the node");
+ assert(!"GetRefPosition didn't find the node");
unreached();
}
//------------------------------------------------------------------------
- // LocationInfoList::GetSecond: Gets the second node in the list.
+ // RefInfoList::GetSecond: Gets the second node in the list.
//
// Arguments:
// (DEBUG ONLY) treeNode - The GenTree* we expect to be in the second node.
//
- LocationInfoListNode* GetSecond(INDEBUG(GenTree* treeNode))
+ RefInfoListNode* GetSecond(INDEBUG(GenTree* treeNode))
{
noway_assert((Begin() != nullptr) && (Begin()->Next() != nullptr));
- LocationInfoListNode* second = Begin()->Next();
+ RefInfoListNode* second = Begin()->Next();
assert(second->treeNode == treeNode);
return second;
}
+
+#ifdef DEBUG
+ // Count - return the number of nodes in the list (DEBUG only)
+ int Count()
+ {
+ int count = 0;
+ for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
+ {
+ count++;
+ }
+ return count;
+ }
+#endif // DEBUG
};
//------------------------------------------------------------------------
-// LocationInfoListNodePool: manages a pool of `LocationInfoListNode`
-// values to decrease overall memory usage
-// during `buildIntervals`.
+// RefInfoListNodePool: manages a pool of `RefInfoListNode`
+// values to decrease overall memory usage
+// during `buildIntervals`.
//
-// `buildIntervals` involves creating a list of location info values per
+// `buildIntervals` involves creating a list of RefInfo items per
// node that either directly produces a set of registers or that is a
// contained node with register-producing sources. However, these lists
// are short-lived: they are destroyed once the use of the corresponding
// node is processed. As such, there is typically only a small number of
-// `LocationInfoListNode` values in use at any given time. Pooling these
+// `RefInfoListNode` values in use at any given time. Pooling these
// values avoids otherwise frequent allocations.
-class LocationInfoListNodePool final
+class RefInfoListNodePool final
{
- LocationInfoListNode* m_freeList;
+ RefInfoListNode* m_freeList;
Compiler* m_compiler;
static const unsigned defaultPreallocation = 8;
public:
- // Creates a pool of `LocationInfoListNode` values.
- LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation);
-
- // Fetches an unused node from the pool.
- LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0);
-
- // Returns a list of nodes to the pool.
- void ReturnNodes(LocationInfoList& list);
+ RefInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation);
+ RefInfoListNode* GetNode(RefPosition* r, GenTree* t, unsigned regIdx = 0);
+ void ReturnNodes(RefInfoList& list);
+ void ReturnNode(RefInfoListNode* listNode);
};
struct LsraBlockInfo
@@ -614,7 +615,7 @@ inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1
{
return false;
}
- return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple);
+ return leafInRange(leaf->gtGetOp2(), lower, upper, multiple);
}
inline bool isCandidateVar(LclVarDsc* varDsc)
@@ -643,15 +644,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// to the next RefPosition in code order
// THIS IS THE OPTION CURRENTLY BEING PURSUED
-class LocationInfoList;
-class LocationInfoListNodePool;
-
class LinearScan : public LinearScanInterface
{
friend class RefPosition;
friend class Interval;
friend class Lowering;
- friend class TreeNodeInfo;
public:
// This could use further abstraction. From Compiler we need the tree,
@@ -661,51 +658,11 @@ public:
// This is the main driver
virtual void doLinearScan();
- // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates.
- // Instead of storing actual register masks, however, which are large, we store a small index into a table
- // of register masks, stored in this class. We create only as many distinct register masks as are needed.
- // All identical register masks get the same index. The register mask table contains:
- // 1. A mask containing all eligible integer registers.
- // 2. A mask containing all elibible floating-point registers.
- // 3. A mask for each of single register.
- // 4. A mask for each combination of registers, created dynamically as required.
- //
- // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask
- // table is never resized. It is also limited by the size of the index, currently an unsigned char.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_ARM64_)
- static const int numMasks = 128;
-#else
- static const int numMasks = 64;
-#endif
-
- regMaskTP* regMaskTable;
- int nextFreeMask;
-
- typedef int RegMaskIndex;
-
- // allint is 0, allfloat is 1, all the single-bit masks start at 2
- enum KnownRegIndex
- {
- ALLINT_IDX = 0,
- ALLFLOAT_IDX = 1,
- FIRST_SINGLE_REG_IDX = 2
- };
-
- RegMaskIndex GetIndexForRegMask(regMaskTP mask);
- regMaskTP GetRegMaskForIndex(RegMaskIndex index);
- void RemoveRegistersFromMasks(regMaskTP removeMask);
-
static bool isSingleRegister(regMaskTP regMask)
{
return (genExactlyOneBit(regMask));
}
-#ifdef DEBUG
- void dspRegisterMaskTable();
-#endif // DEBUG
-
// Initialize the block traversal for LSRA.
// This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
// which determines the order in which blocks will be allocated (currently called during Lowering).
@@ -811,11 +768,6 @@ private:
enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,
LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};
- // When we limit the number of candidate registers, we have to take into account any
- // "specialPutArg" references that are in flight, as that increases the number of live
- // registers between it and the next call.
- int specialPutArgCount;
-
// When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
// registers, so as to get different coverage than limiting to callee or caller.
// At least for x86 and AMD64, and potentially other architecture that will support SIMD,
@@ -964,6 +916,7 @@ private:
}
// Dump support
+ void dumpNodeInfo(GenTree* node, regMaskTP dstCandidates, int srcCount, int dstCount);
void dumpDefList();
void lsraDumpIntervals(const char* msg);
void dumpRefPositions(const char* msg);
@@ -984,7 +937,7 @@ private:
void verifyFinalAllocation();
void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
#else // !DEBUG
- bool doSelectNearest()
+ bool doSelectNearest()
{
return false;
}
@@ -1080,7 +1033,9 @@ private:
void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc);
+ VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree,
+ LsraLocation currentLoc,
+ regMaskTP fpCalleeKillSet);
void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
@@ -1112,19 +1067,28 @@ private:
// Helpers for getKillSetForNode().
regMaskTP getKillSetForStoreInd(GenTreeStoreInd* tree);
+ regMaskTP getKillSetForMul(GenTreeOp* tree);
+ regMaskTP getKillSetForCall(GenTreeCall* call);
+ regMaskTP getKillSetForModDiv(GenTreeOp* tree);
+ regMaskTP getKillSetForBlockStore(GenTreeBlk* blkNode);
+ regMaskTP getKillSetForReturn();
+ regMaskTP getKillSetForProfilerHook();
#ifdef FEATURE_HW_INTRINSICS
regMaskTP getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node);
#endif // FEATURE_HW_INTRINSICS
- // Return the registers killed by the given tree node.
+// Return the registers killed by the given tree node.
+// This is used only for an assert, and for stress, so it is only defined under DEBUG.
+// Otherwise, the Build methods should obtain the killMask from the appropriate method above.
+#ifdef DEBUG
regMaskTP getKillSetForNode(GenTree* tree);
+#endif
// Given some tree node add refpositions for all the registers this node kills
- bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc);
+ bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask);
regMaskTP allRegs(RegisterType rt);
- regMaskTP allRegs(GenTree* tree);
- regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree);
+ regMaskTP allByteRegs();
regMaskTP allSIMDRegs();
regMaskTP internalFloatRegCandidates();
@@ -1143,11 +1107,11 @@ private:
return tree->TypeGet();
}
- RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask);
-
- int buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[]);
-
- void buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total);
+ // Managing internal registers during the BuildNode process.
+ RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP candidates);
+ RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
+ RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
+ void buildInternalRegisterUses();
void resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition);
@@ -1206,6 +1170,13 @@ private:
regMaskTP mask,
unsigned multiRegIdx = 0);
+ // This creates a RefTypeUse at currentLoc. It sets the treeNode to nullptr if it is not a
+ // lclVar interval.
+ RefPosition* newUseRefPosition(Interval* theInterval,
+ GenTree* theTreeNode,
+ regMaskTP mask,
+ unsigned multiRegIdx = 0);
+
RefPosition* newRefPosition(
regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
@@ -1570,119 +1541,97 @@ private:
// Build methods
//-----------------------------------------------------------------------
- // The listNodePool is used to maintain the TreeNodeInfo for nodes that are "in flight"
+ // The listNodePool is used to maintain the RefInfo for nodes that are "in flight"
// i.e. whose consuming node has not yet been handled.
- LocationInfoListNodePool listNodePool;
+ RefInfoListNodePool listNodePool;
- // The defList is used for the transient TreeNodeInfo that is computed by
+ // The defList is used for the transient RefInfo that is computed by
// the Build methods, and used in building RefPositions.
// When Def RefPositions are built for a node, their NodeInfo is placed
// in the defList. As the consuming node is handled, it moves the NodeInfo
// into an ordered useList corresponding to the uses for that node.
- LocationInfoList defList;
+ RefInfoList defList;
- // The useList is constructed for each node by the Build methods.
- // It contains the TreeNodeInfo for its operands, in their order of use.
- LocationInfoList useList;
+ // As we build uses, we may want to preference the next definition (i.e. the register produced
+ // by the current node) to the same register as one of its uses. This is done by setting
+ // 'tgtPrefUse' to that RefPosition.
+ RefPosition* tgtPrefUse = nullptr;
- // During the build phase, this is the NodeInfo for the current node.
- TreeNodeInfo* currentNodeInfo;
+ // The following keep track of information about internal (temporary register) intervals
+ // during the building of a single node.
+ static const int MaxInternalCount = 4;
+ RefPosition* internalDefs[MaxInternalCount];
+ int internalCount = 0;
+ bool setInternalRegsDelayFree;
- // Remove the LocationInfoListNode for the given node from the defList, and put it into the useList.
- // The node must not be contained, and must have been processed by buildRefPositionsForNode().
- void appendLocationInfoToList(GenTree* node)
- {
- LocationInfoListNode* locationInfo = defList.removeListNode(node);
- useList.Append(locationInfo);
- }
- // Get the LocationInfoListNodes for the given node, and return it, but don't put it into the useList.
- // The node must not be contained, and must have been processed by buildRefPositionsForNode().
- LocationInfoListNode* getLocationInfo(GenTree* node)
+ // When a RefTypeUse is marked as 'delayRegFree', we also want to mark the RefTypeDef
+ // in the next Location as 'hasInterferingUses'. This is accomplished by setting this
+ // 'pendingDelayFree' to true as they are created, and clearing it as a new node is
+ // handled in 'BuildNode'.
+ bool pendingDelayFree;
+
+ // This method clears the "build state" before starting to handle a new node.
+ void clearBuildState()
{
- LocationInfoListNode* locationInfo = defList.removeListNode(node);
- return locationInfo;
- }
- //------------------------------------------------------------------------
- // appendBinaryLocationInfoToList: Get the LocationInfoListNodes for the operands of the
- // given node, and put them into the useList.
- //
- // Arguments:
- // node - a GenTreeOp
- //
- // Return Value:
- // The number of actual register operands.
- //
- // Notes:
- // The operands must already have been processed by buildRefPositionsForNode, and their
- // LocationInfoListNodes placed in the defList.
- //
- int appendBinaryLocationInfoToList(GenTreeOp* node)
- {
- bool found;
- LocationInfoListNode* op1LocationInfo = nullptr;
- LocationInfoListNode* op2LocationInfo = nullptr;
- int srcCount = 0;
- GenTree* op1 = node->gtOp1;
- GenTree* op2 = node->gtGetOp2IfPresent();
- if (node->IsReverseOp() && op2 != nullptr)
- {
- srcCount += GetOperandInfo(op2);
- op2 = nullptr;
- }
- if (op1 != nullptr)
- {
- srcCount += GetOperandInfo(op1);
- }
- if (op2 != nullptr)
- {
- srcCount += GetOperandInfo(op2);
- }
- return srcCount;
+ tgtPrefUse = nullptr;
+ internalCount = 0;
+ setInternalRegsDelayFree = false;
+ pendingDelayFree = false;
}
- // This is the main entry point for computing the TreeNodeInfo for a node.
- void BuildNode(GenTree* stmt);
+ RefInfoListNode* getRefInfo(GenTree* node);
+ RefInfoListNode* getRefInfo(GenTree* node, int multiRegIdx);
- void BuildCheckByteable(GenTree* tree);
+ RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0);
+ void setDelayFree(RefPosition* use);
+ int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
+#ifdef _TARGET_XARCH_
+ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
+#endif // !_TARGET_XARCH_
+ // This is the main entry point for building the RefPositions for a node.
+ // These methods return the number of sources.
+ int BuildNode(GenTree* stmt);
+
+ void BuildCheckByteable(GenTree* tree);
+ GenTree* getTgtPrefOperand(GenTreeOp* tree);
bool CheckAndSetDelayFree(GenTree* delayUseSrc);
+ bool supportsSpecialPutArg();
- void BuildSimple(GenTree* tree);
- int GetOperandInfo(GenTree* node);
- int GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo);
- int GetIndirInfo(GenTreeIndir* indirTree);
+ int BuildSimple(GenTree* tree);
+ int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE);
+ int BuildDelayFreeUses(GenTree* node, regMaskTP candidates = RBM_NONE);
+ int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE);
void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
+ RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0);
+ void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE);
+ void BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask);
- void BuildStoreLoc(GenTree* tree);
- void BuildReturn(GenTree* tree);
+ int BuildStoreLoc(GenTree* tree);
+ int BuildReturn(GenTree* tree);
#ifdef _TARGET_XARCH_
// This method, unlike the others, returns the number of sources, since it may be called when
// 'tree' is contained.
int BuildShiftRotate(GenTree* tree);
#endif // _TARGET_XARCH_
#ifdef _TARGET_ARM_
- void BuildShiftLongCarry(GenTree* tree);
-#endif
- void BuildPutArgReg(GenTreeUnOp* node);
- void BuildCall(GenTreeCall* call);
- void BuildCmp(GenTree* tree);
- void BuildStructArg(GenTree* structArg);
- void BuildBlockStore(GenTreeBlk* blkNode);
- void BuildModDiv(GenTree* tree);
- void BuildIntrinsic(GenTree* tree);
- void BuildStoreLoc(GenTreeLclVarCommon* tree);
- void BuildIndir(GenTreeIndir* indirTree);
- void BuildGCWriteBarrier(GenTree* tree);
- void BuildCast(GenTree* tree);
-
-#ifdef _TARGET_X86_
- bool ExcludeNonByteableRegisters(GenTree* tree);
+ int BuildShiftLongCarry(GenTree* tree);
#endif
+ int BuildPutArgReg(GenTreeUnOp* node);
+ int BuildCall(GenTreeCall* call);
+ int BuildCmp(GenTree* tree);
+ int BuildBlockStore(GenTreeBlk* blkNode);
+ int BuildModDiv(GenTree* tree);
+ int BuildIntrinsic(GenTree* tree);
+ int BuildStoreLoc(GenTreeLclVarCommon* tree);
+ int BuildIndir(GenTreeIndir* indirTree);
+ int BuildGCWriteBarrier(GenTree* tree);
+ int BuildCast(GenTree* tree);
#if defined(_TARGET_XARCH_)
// returns true if the tree can use the read-modify-write memory instruction form
bool isRMWRegOper(GenTree* tree);
- void BuildMul(GenTree* tree);
+ int BuildMul(GenTree* tree);
void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
// Move the last use bit, if any, from 'fromTree' to 'toTree'; 'fromTree' must be contained.
void CheckAndMoveRMWLastUse(GenTree* fromTree, GenTree* toTree)
@@ -1709,18 +1658,18 @@ private:
#endif // defined(_TARGET_XARCH_)
#ifdef FEATURE_SIMD
- void BuildSIMD(GenTreeSIMD* tree);
+ int BuildSIMD(GenTreeSIMD* tree);
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
- void BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree);
+ int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree);
#endif // FEATURE_HW_INTRINSICS
- void BuildPutArgStk(GenTreePutArgStk* argNode);
+ int BuildPutArgStk(GenTreePutArgStk* argNode);
#ifdef _TARGET_ARM_
- void BuildPutArgSplit(GenTreePutArgSplit* tree);
+ int BuildPutArgSplit(GenTreePutArgSplit* tree);
#endif
- void BuildLclHeap(GenTree* tree);
+ int BuildLclHeap(GenTree* tree);
};
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
index db6cff70d1..a9e4c78476 100644
--- a/src/jit/lsraarm.cpp
+++ b/src/jit/lsraarm.cpp
@@ -27,10 +27,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
#include "lsra.h"
-void LinearScan::BuildLclHeap(GenTree* tree)
+int LinearScan::BuildLclHeap(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
- assert(info->dstCount == 1);
+ int srcCount = 0;
// Need a variable number of temp regs (see genLclHeap() in codegenarm.cpp):
// Here '-' means don't care.
@@ -50,16 +49,17 @@ void LinearScan::BuildLclHeap(GenTree* tree)
hasPspSym = false;
#endif
- GenTree* size = tree->gtOp.gtOp1;
+ GenTree* size = tree->gtGetOp1();
+ int internalIntCount;
if (size->IsCnsIntOrI())
{
assert(size->isContained());
- info->srcCount = 0;
+ srcCount = 0;
size_t sizeVal = size->gtIntCon.gtIconVal;
if (sizeVal == 0)
{
- info->internalIntCount = 0;
+ internalIntCount = 0;
}
else
{
@@ -69,45 +69,52 @@ void LinearScan::BuildLclHeap(GenTree* tree)
// For small allocations up to 4 store instructions
if (cntStackAlignedWidthItems <= 4)
{
- info->internalIntCount = 0;
+ internalIntCount = 0;
}
else if (!compiler->info.compInitMem)
{
// No need to initialize allocated stack space.
if (sizeVal < compiler->eeGetPageSize())
{
- info->internalIntCount = 0;
+ internalIntCount = 0;
}
else
{
- info->internalIntCount = 1;
+ internalIntCount = 1;
}
}
else
{
- info->internalIntCount = 1;
+ internalIntCount = 1;
}
if (hasPspSym)
{
- info->internalIntCount++;
+ internalIntCount++;
}
}
}
else
{
// target (regCnt) + tmp + [psp]
- info->srcCount = 1;
- info->internalIntCount = hasPspSym ? 2 : 1;
- appendLocationInfoToList(size);
+ srcCount = 1;
+ internalIntCount = hasPspSym ? 2 : 1;
+ BuildUse(size);
}
// If we are needed in temporary registers we should be sure that
// it's different from target (regCnt)
- if (info->internalIntCount > 0)
+ if (internalIntCount > 0)
{
- info->isInternalRegDelayFree = true;
+ setInternalRegsDelayFree = true;
+ for (int i = 0; i < internalIntCount; i++)
+ {
+ buildInternalIntRegisterDefForNode(tree);
+ }
}
+ buildInternalRegisterUses();
+ BuildDef(tree);
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -116,81 +123,95 @@ void LinearScan::BuildLclHeap(GenTree* tree)
// Arguments:
// tree - The node of interest
//
+// Return Value:
+// The number of sources consumed by this node.
+//
// Note: these operands have uses that interfere with the def and need the special handling.
//
-void LinearScan::BuildShiftLongCarry(GenTree* tree)
+int LinearScan::BuildShiftLongCarry(GenTree* tree)
{
assert(tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO);
- GenTree* source = tree->gtOp.gtOp1;
+ int srcCount = 2;
+ GenTree* source = tree->gtOp.gtOp1;
assert((source->OperGet() == GT_LONG) && source->isContained());
- TreeNodeInfo* info = currentNodeInfo;
- info->srcCount = 2;
+ GenTree* sourceLo = source->gtGetOp1();
+ GenTree* sourceHi = source->gtGetOp2();
+ GenTree* shiftBy = tree->gtGetOp2();
+ assert(!sourceLo->isContained() && !sourceHi->isContained());
+ RefPosition* sourceLoUse = BuildUse(sourceLo);
+ RefPosition* sourceHiUse = BuildUse(sourceHi);
- LocationInfoListNode* sourceLoInfo = getLocationInfo(source->gtOp.gtOp1);
- LocationInfoListNode* sourceHiInfo = getLocationInfo(source->gtOp.gtOp2);
- if (tree->OperGet() == GT_LSH_HI)
+ if (!tree->isContained())
{
- sourceLoInfo->info.isDelayFree = true;
+ if (tree->OperGet() == GT_LSH_HI)
+ {
+ setDelayFree(sourceLoUse);
+ }
+ else
+ {
+ setDelayFree(sourceHiUse);
+ }
+ if (!shiftBy->isContained())
+ {
+ BuildUse(shiftBy);
+ srcCount++;
+ }
+ BuildDef(tree);
}
else
{
- sourceHiInfo->info.isDelayFree = true;
- }
- useList.Append(sourceLoInfo);
- useList.Append(sourceHiInfo);
- info->hasDelayFreeSrc = true;
-
- GenTree* shiftBy = tree->gtOp.gtOp2;
- if (!shiftBy->isContained())
- {
- appendLocationInfoToList(shiftBy);
- info->srcCount += 1;
+ if (!shiftBy->isContained())
+ {
+ BuildUse(shiftBy);
+ srcCount++;
+ }
}
+ return srcCount;
}
//------------------------------------------------------------------------
-// BuildNode: Set the register requirements for RA.
+// BuildNode: Build the RefPositions for for a node
//
-// Notes:
-// Takes care of annotating the register requirements
-// for every TreeNodeInfo struct that maps to each tree node.
+// Arguments:
+// treeNode - the node of interest
//
+// Return Value:
+// The number of sources consumed by this node.
+//
+// Notes:
// Preconditions:
-// LSRA has been initialized and there is a TreeNodeInfo node
-// already allocated and initialized for every tree in the IR.
+// LSRA Has been initialized.
//
// Postconditions:
-// Every TreeNodeInfo instance has the right annotations on register
-// requirements needed by LSRA to build the Interval Table (source,
-// destination and internal [temp] register counts).
+// RefPositions have been built for all the register defs and uses required
+// for this node.
//
-void LinearScan::BuildNode(GenTree* tree)
+int LinearScan::BuildNode(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
- unsigned kind = tree->OperKind();
- RegisterType registerType = TypeGet(tree);
+ assert(!tree->isContained());
+ int srcCount;
+ int dstCount = 0;
+ regMaskTP dstCandidates = RBM_NONE;
+ regMaskTP killMask = RBM_NONE;
+ bool isLocalDefUse = false;
- if (tree->isContained())
- {
- info->dstCount = 0;
- assert(info->srcCount == 0);
- return;
- }
+ // Reset the build-related members of LinearScan.
+ clearBuildState();
// Set the default dstCount. This may be modified below.
if (tree->IsValue())
{
- info->dstCount = 1;
+ dstCount = 1;
if (tree->IsUnusedValue())
{
- info->isLocalDefUse = true;
+ isLocalDefUse = true;
}
}
else
{
- info->dstCount = 0;
+ dstCount = 0;
}
switch (tree->OperGet())
@@ -198,23 +219,49 @@ void LinearScan::BuildNode(GenTree* tree)
GenTree* op1;
GenTree* op2;
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ {
+ // We handle tracked variables differently from non-tracked ones. If it is tracked,
+ // we will simply add a use of the tracked variable at its parent/consumer.
+ // Otherwise, for a use we need to actually add the appropriate references for loading
+ // or storing the variable.
+ //
+ // A tracked variable won't actually get used until the appropriate ancestor tree node
+ // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
+ // to a call or an orphaned dead node.
+ //
+ LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
+ if (isCandidateVar(varDsc))
+ {
+ INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1));
+ return 0;
+ }
+ srcCount = 0;
+ BuildDef(tree);
+ }
+ break;
+
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- BuildStoreLoc(tree->AsLclVarCommon());
+ srcCount = BuildStoreLoc(tree->AsLclVarCommon());
break;
case GT_NOP:
// A GT_NOP is either a passthrough (if it is void, or if it has
// a child), but must be considered to produce a dummy value if it
// has a type but no child
- info->srcCount = 0;
- if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ srcCount = 0;
+ assert((tree->gtGetOp1() == nullptr) || tree->isContained());
+ if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
{
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
+ BuildUse(tree->gtGetOp1());
+ BuildDef(tree);
}
else
{
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
}
break;
@@ -222,17 +269,18 @@ void LinearScan::BuildNode(GenTree* tree)
{
// TODO-ARM: Implement other type of intrinsics (round, sqrt and etc.)
// Both operand and its result must be of the same floating point type.
- op1 = tree->gtOp.gtOp1;
+ op1 = tree->gtGetOp1();
assert(varTypeIsFloating(op1));
assert(op1->TypeGet() == tree->TypeGet());
- appendLocationInfoToList(op1);
+ BuildUse(op1);
+ srcCount = 1;
switch (tree->gtIntrinsic.gtIntrinsicId)
{
case CORINFO_INTRINSIC_Abs:
case CORINFO_INTRINSIC_Sqrt:
- info->srcCount = 1;
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
+ BuildDef(tree);
break;
default:
unreached();
@@ -243,7 +291,7 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_CAST:
{
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
// Non-overflow casts to/from float/double are done using SSE2 instructions
// and that allow the source operand to be either a reg or memop. Given the
@@ -252,7 +300,6 @@ void LinearScan::BuildNode(GenTree* tree)
var_types castToType = tree->CastToType();
GenTree* castOp = tree->gtCast.CastOp();
var_types castOpType = castOp->TypeGet();
- info->srcCount = GetOperandInfo(castOp);
if (tree->gtFlags & GTF_UNSIGNED)
{
castOpType = genUnsignedType(castOpType);
@@ -261,15 +308,13 @@ void LinearScan::BuildNode(GenTree* tree)
if (varTypeIsLong(castOpType))
{
assert((castOp->OperGet() == GT_LONG) && castOp->isContained());
- info->srcCount = 2;
}
// FloatToIntCast needs a temporary register
if (varTypeIsFloating(castOpType) && varTypeIsIntOrI(tree))
{
- info->setInternalCandidates(this, RBM_ALLFLOAT);
- info->internalFloatCount = 1;
- info->isInternalRegDelayFree = true;
+ buildInternalFloatRegisterDefForNode(tree, RBM_ALLFLOAT);
+ setInternalRegsDelayFree = true;
}
Lowering::CastInfo castInfo;
@@ -293,7 +338,7 @@ void LinearScan::BuildNode(GenTree* tree)
bool canStoreTypeMask = emitter::emitIns_valid_imm_for_alu(castInfo.typeMask);
if (!canStoreTypeMask)
{
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
}
else
@@ -306,45 +351,49 @@ void LinearScan::BuildNode(GenTree* tree)
if (!canStoreMaxValue || !canStoreMinValue)
{
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
}
}
}
+ srcCount = BuildOperandUses(castOp);
+ buildInternalRegisterUses();
+ BuildDef(tree);
}
break;
case GT_JTRUE:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_JMP:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_SWITCH:
// This should never occur since switch nodes must not be visible at this
// point in the JIT.
- info->srcCount = 0;
+ srcCount = 0;
noway_assert(!"Switch must be lowered at this point");
break;
case GT_JMPTABLE:
- info->srcCount = 0;
- assert(info->dstCount == 1);
+ srcCount = 0;
+ assert(dstCount == 1);
+ BuildDef(tree);
break;
case GT_SWITCH_TABLE:
- assert(info->dstCount == 0);
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->srcCount == 2);
+ assert(dstCount == 0);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(srcCount == 2);
break;
case GT_ASG:
noway_assert(!"We should never hit any assignment operator in lowering");
- info->srcCount = 0;
+ srcCount = 0;
break;
case GT_ADD_LO:
@@ -360,11 +409,12 @@ void LinearScan::BuildNode(GenTree* tree)
// No implicit conversions at this stage as the expectation is that
// everything is made explicit by adding casts.
- assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+ assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
- assert(info->dstCount == 1);
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->srcCount == 2);
+ assert(dstCount == 1);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(srcCount == 2);
+ BuildDef(tree);
break;
}
@@ -377,32 +427,35 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_RSH:
case GT_RSZ:
case GT_ROR:
- assert(info->dstCount == 1);
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->srcCount == (tree->gtOp.gtOp2->isContained() ? 1 : 2));
+ assert(dstCount == 1);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(srcCount == (tree->gtGetOp2()->isContained() ? 1 : 2));
+ BuildDef(tree);
break;
case GT_LSH_HI:
case GT_RSH_LO:
- assert(info->dstCount == 1);
- BuildShiftLongCarry(tree);
- assert(info->srcCount == (tree->gtOp.gtOp2->isContained() ? 2 : 3));
+ assert(dstCount == 1);
+ srcCount = BuildShiftLongCarry(tree);
+ assert(srcCount == (tree->gtOp.gtOp2->isContained() ? 2 : 3));
break;
case GT_RETURNTRAP:
// this just turns into a compare of its child with an int
// + a conditional call
- info->srcCount = 1;
- assert(info->dstCount == 0);
- appendLocationInfoToList(tree->gtOp.gtOp1);
+ srcCount = 1;
+ assert(dstCount == 0);
+ BuildUse(tree->gtGetOp1());
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+ BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
break;
case GT_MUL:
if (tree->gtOverflow())
{
// Need a register different from target reg to check for overflow.
- info->internalIntCount = 1;
- info->isInternalRegDelayFree = true;
+ setInternalRegsDelayFree = true;
+ buildInternalIntRegisterDefForNode(tree);
}
__fallthrough;
@@ -410,22 +463,26 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_MULHI:
case GT_UDIV:
{
- assert(info->dstCount == 1);
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->srcCount == 2);
+ assert(dstCount == 1);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(srcCount == 2);
+ buildInternalRegisterUses();
+ BuildDef(tree);
}
break;
case GT_MUL_LONG:
- info->dstCount = 2;
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->srcCount == 2);
+ dstCount = 2;
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(srcCount == 2);
+ BuildDefs(tree, 2);
break;
case GT_FIELD_LIST:
// These should always be contained. We don't correctly allocate or
// generate code for a non-contained GT_FIELD_LIST.
noway_assert(!"Non-contained GT_FIELD_LIST");
+ srcCount = 0;
break;
case GT_LIST:
@@ -433,8 +490,8 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_NO_OP:
case GT_START_NONGC:
case GT_PROF_HOOK:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_LONG:
@@ -442,22 +499,20 @@ void LinearScan::BuildNode(GenTree* tree)
// An unused GT_LONG doesn't produce any registers.
tree->gtType = TYP_VOID;
tree->ClearUnusedValue();
- info->isLocalDefUse = false;
+ isLocalDefUse = false;
// An unused GT_LONG node needs to consume its sources, but need not produce a register.
- info->srcCount = 2;
- info->dstCount = 0;
- appendLocationInfoToList(tree->gtGetOp1());
- appendLocationInfoToList(tree->gtGetOp2());
+ srcCount = 2;
+ dstCount = 0;
+ BuildUse(tree->gtGetOp1());
+ BuildUse(tree->gtGetOp2());
break;
case GT_CNS_DBL:
- info->srcCount = 0;
- assert(info->dstCount == 1);
if (tree->TypeGet() == TYP_FLOAT)
{
// An int register for float constant
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
else
{
@@ -465,29 +520,36 @@ void LinearScan::BuildNode(GenTree* tree)
assert(tree->TypeGet() == TYP_DOUBLE);
// Two int registers for double constant
- info->internalIntCount = 2;
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalIntRegisterDefForNode(tree);
}
- break;
+ __fallthrough;
+
+ case GT_CNS_INT:
+ {
+ srcCount = 0;
+ assert(dstCount == 1);
+ buildInternalRegisterUses();
+ RefPosition* def = BuildDef(tree);
+ def->getInterval()->isConstant = true;
+ }
+ break;
case GT_RETURN:
- BuildReturn(tree);
+ srcCount = BuildReturn(tree);
break;
case GT_RETFILT:
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
if (tree->TypeGet() == TYP_VOID)
{
- info->srcCount = 0;
+ srcCount = 0;
}
else
{
assert(tree->TypeGet() == TYP_INT);
-
- info->srcCount = 1;
- info->setSrcCandidates(this, RBM_INTRET);
- LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
- locationInfo->info.setSrcCandidates(this, RBM_INTRET);
- useList.Append(locationInfo);
+ srcCount = 1;
+ BuildUse(tree->gtGetOp1(), RBM_INTRET);
}
break;
@@ -497,34 +559,34 @@ void LinearScan::BuildNode(GenTree* tree)
#endif // FEATURE_SIMD
{
// Consumes arrLen & index - has no result
- info->srcCount = 2;
- assert(info->dstCount == 0);
- appendLocationInfoToList(tree->AsBoundsChk()->gtIndex);
- appendLocationInfoToList(tree->AsBoundsChk()->gtArrLen);
+ srcCount = 2;
+ assert(dstCount == 0);
+ BuildUse(tree->AsBoundsChk()->gtIndex);
+ BuildUse(tree->AsBoundsChk()->gtArrLen);
}
break;
case GT_ARR_ELEM:
// These must have been lowered to GT_ARR_INDEX
noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_ARR_INDEX:
{
- info->srcCount = 2;
- assert(info->dstCount == 1);
- info->internalIntCount = 1;
- info->isInternalRegDelayFree = true;
+ srcCount = 2;
+ assert(dstCount == 1);
+ buildInternalIntRegisterDefForNode(tree);
+ setInternalRegsDelayFree = true;
// For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
// times while the result is being computed.
- LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj());
- arrObjInfo->info.isDelayFree = true;
- useList.Append(arrObjInfo);
- useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr()));
- info->hasDelayFreeSrc = true;
+ RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
+ setDelayFree(arrObjUse);
+ BuildUse(tree->AsArrIndex()->IndexExpr());
+ buildInternalRegisterUses();
+ BuildDef(tree);
}
break;
@@ -532,22 +594,24 @@ void LinearScan::BuildNode(GenTree* tree)
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
if (tree->gtArrOffs.gtOffset->isContained())
{
- info->srcCount = 2;
+ srcCount = 2;
}
else
{
// Here we simply need an internal register, which must be different
// from any of the operand's registers, but may be the same as targetReg.
- info->internalIntCount = 1;
- info->srcCount = 3;
- appendLocationInfoToList(tree->AsArrOffs()->gtOffset);
+ buildInternalIntRegisterDefForNode(tree);
+ srcCount = 3;
+ BuildUse(tree->AsArrOffs()->gtOffset);
}
- appendLocationInfoToList(tree->AsArrOffs()->gtIndex);
- appendLocationInfoToList(tree->AsArrOffs()->gtArrObj);
+ BuildUse(tree->AsArrOffs()->gtIndex);
+ BuildUse(tree->AsArrOffs()->gtArrObj);
+ buildInternalRegisterUses();
+ BuildDef(tree);
break;
case GT_LEA:
@@ -556,17 +620,17 @@ void LinearScan::BuildNode(GenTree* tree)
int offset = lea->Offset();
// This LEA is instantiating an address, so we set up the srcCount and dstCount here.
- info->srcCount = 0;
- assert(info->dstCount == 1);
+ srcCount = 0;
+ assert(dstCount == 1);
if (lea->HasBase())
{
- info->srcCount++;
- appendLocationInfoToList(tree->AsAddrMode()->Base());
+ srcCount++;
+ BuildUse(tree->AsAddrMode()->Base());
}
if (lea->HasIndex())
{
- info->srcCount++;
- appendLocationInfoToList(tree->AsAddrMode()->Index());
+ srcCount++;
+ BuildUse(tree->AsAddrMode()->Index());
}
// An internal register may be needed too; the logic here should be in sync with the
@@ -576,7 +640,7 @@ void LinearScan::BuildNode(GenTree* tree)
if (offset != 0)
{
// We need a register when we have all three: base reg, index reg and a non-zero offset.
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
}
else if (lea->HasBase())
@@ -584,22 +648,26 @@ void LinearScan::BuildNode(GenTree* tree)
if (!emitter::emitIns_valid_imm_for_add(offset, INS_FLAGS_DONT_CARE))
{
// We need a register when we have an offset that is too large to encode in the add instruction.
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
}
+ buildInternalRegisterUses();
+ BuildDef(tree);
}
break;
case GT_NEG:
- info->srcCount = 1;
- assert(info->dstCount == 1);
- appendLocationInfoToList(tree->gtOp.gtOp1);
+ srcCount = 1;
+ assert(dstCount == 1);
+ BuildUse(tree->gtGetOp1());
+ BuildDef(tree);
break;
case GT_NOT:
- info->srcCount = 1;
- assert(info->dstCount == 1);
- appendLocationInfoToList(tree->gtOp.gtOp1);
+ srcCount = 1;
+ assert(dstCount == 1);
+ BuildUse(tree->gtGetOp1());
+ BuildDef(tree);
break;
case GT_EQ:
@@ -609,134 +677,144 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_GE:
case GT_GT:
case GT_CMP:
- BuildCmp(tree);
+ srcCount = BuildCmp(tree);
break;
case GT_CKFINITE:
- info->srcCount = 1;
- assert(info->dstCount == 1);
- info->internalIntCount = 1;
- appendLocationInfoToList(tree->gtOp.gtOp1);
+ srcCount = 1;
+ assert(dstCount == 1);
+ buildInternalIntRegisterDefForNode(tree);
+ BuildUse(tree->gtGetOp1());
+ buildInternalRegisterUses();
+ BuildDef(tree);
break;
case GT_CALL:
- BuildCall(tree->AsCall());
+ srcCount = BuildCall(tree->AsCall());
+ if (tree->AsCall()->HasMultiRegRetVal())
+ {
+ dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+ }
break;
case GT_ADDR:
{
// For a GT_ADDR, the child node should not be evaluated into a register
- GenTree* child = tree->gtOp.gtOp1;
+ GenTree* child = tree->gtGetOp1();
assert(!isCandidateLocalRef(child));
assert(child->isContained());
- assert(info->dstCount == 1);
- info->srcCount = 0;
+ assert(dstCount == 1);
+ srcCount = 0;
+ BuildDef(tree);
}
break;
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- BuildBlockStore(tree->AsBlk());
+ srcCount = BuildBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
// Always a passthrough of its child's value.
assert(!"INIT_VAL should always be contained");
+ srcCount = 0;
break;
case GT_LCLHEAP:
- BuildLclHeap(tree);
+ srcCount = BuildLclHeap(tree);
break;
case GT_STOREIND:
{
- assert(info->dstCount == 0);
- GenTree* src = tree->gtOp.gtOp2;
+ assert(dstCount == 0);
+ GenTree* src = tree->gtGetOp2();
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
- info->srcCount = 2;
- BuildGCWriteBarrier(tree);
+ srcCount = BuildGCWriteBarrier(tree);
break;
}
- BuildIndir(tree->AsIndir());
+ srcCount = BuildIndir(tree->AsIndir());
// No contained source on ARM.
assert(!src->isContained());
- info->srcCount++;
- appendLocationInfoToList(src);
+ srcCount++;
+ BuildUse(src);
}
break;
case GT_NULLCHECK:
// It requires a internal register on ARM, as it is implemented as a load
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
assert(!tree->gtGetOp1()->isContained());
- info->srcCount = 1;
- info->internalIntCount = 1;
- appendLocationInfoToList(tree->gtOp.gtOp1);
+ srcCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
+ BuildUse(tree->gtGetOp1());
+ buildInternalRegisterUses();
break;
case GT_IND:
- assert(info->dstCount == 1);
- info->srcCount = 1;
- BuildIndir(tree->AsIndir());
+ assert(dstCount == 1);
+ srcCount = BuildIndir(tree->AsIndir());
break;
case GT_CATCH_ARG:
- info->srcCount = 0;
- assert(info->dstCount == 1);
- info->setDstCandidates(this, RBM_EXCEPTION_OBJECT);
+ srcCount = 0;
+ assert(dstCount == 1);
+ BuildDef(tree, RBM_EXCEPTION_OBJECT);
break;
case GT_CLS_VAR:
- info->srcCount = 0;
+ srcCount = 0;
// GT_CLS_VAR, by the time we reach the backend, must always
// be a pure use.
// It will produce a result of the type of the
// node, and use an internal register for the address.
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalRegisterUses();
+ BuildDef(tree);
break;
case GT_COPY:
- info->srcCount = 1;
+ srcCount = 1;
#ifdef _TARGET_ARM_
// This case currently only occurs for double types that are passed as TYP_LONG;
// actual long types would have been decomposed by now.
if (tree->TypeGet() == TYP_LONG)
{
- info->dstCount = 2;
+ dstCount = 2;
}
else
#endif
{
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
}
- appendLocationInfoToList(tree->gtOp.gtOp1);
+ BuildUse(tree->gtGetOp1());
+ BuildDefs(tree, dstCount);
break;
case GT_PUTARG_SPLIT:
- BuildPutArgSplit(tree->AsPutArgSplit());
+ srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
+ dstCount = tree->AsPutArgSplit()->gtNumRegs;
break;
case GT_PUTARG_STK:
- BuildPutArgStk(tree->AsPutArgStk());
+ srcCount = BuildPutArgStk(tree->AsPutArgStk());
break;
case GT_PUTARG_REG:
- BuildPutArgReg(tree->AsUnOp());
+ srcCount = BuildPutArgReg(tree->AsUnOp());
+ dstCount = tree->AsMultiRegOp()->GetRegCount();
break;
case GT_BITCAST:
{
- info->srcCount = 1;
- assert(info->dstCount == 1);
- LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
- useList.Append(locationInfo);
+ srcCount = 1;
+ assert(dstCount == 1);
regNumber argReg = tree->gtRegNum;
regMaskTP argMask = genRegMask(argReg);
@@ -744,38 +822,38 @@ void LinearScan::BuildNode(GenTree* tree)
// The actual `long` types must have been transformed as a field list with two fields.
if (tree->TypeGet() == TYP_LONG)
{
- info->dstCount++;
+ dstCount++;
assert(genRegArgNext(argReg) == REG_NEXT(argReg));
argMask |= genRegMask(REG_NEXT(argReg));
+ dstCount = 2;
}
- info->setDstCandidates(this, argMask);
- info->setSrcCandidates(this, argMask);
+ BuildUse(tree->gtGetOp1());
+ BuildDefs(tree, dstCount, argMask);
}
break;
- case GT_LCL_FLD:
case GT_LCL_FLD_ADDR:
- case GT_LCL_VAR:
case GT_LCL_VAR_ADDR:
case GT_PHYSREG:
case GT_CLS_VAR_ADDR:
case GT_IL_OFFSET:
- case GT_CNS_INT:
case GT_LABEL:
case GT_PINVOKE_PROLOG:
case GT_JCC:
case GT_SETCC:
case GT_MEMORYBARRIER:
case GT_OBJ:
- BuildSimple(tree);
+ srcCount = BuildSimple(tree);
break;
case GT_INDEX_ADDR:
- info->dstCount = 1;
- info->internalIntCount = 1;
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->srcCount == 2);
+ dstCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(srcCount == 2);
+ buildInternalRegisterUses();
+ BuildDef(tree);
break;
default:
@@ -788,15 +866,13 @@ void LinearScan::BuildNode(GenTree* tree)
unreached();
} // end switch (tree->OperGet())
- if (tree->IsUnusedValue() && (info->dstCount != 0))
- {
- info->isLocalDefUse = true;
- }
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || tree->IsMultiRegNode());
- assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
- assert(!tree->IsUnusedValue() || (info->dstCount != 0));
- assert(info->dstCount == tree->GetRegisterDstCount());
+ // We need to be sure that we've set srcCount and dstCount appropriately
+ assert((dstCount < 2) || tree->IsMultiRegNode());
+ assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
+ assert(!tree->IsUnusedValue() || (dstCount != 0));
+ assert(dstCount == tree->GetRegisterDstCount());
+ INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount));
+ return srcCount;
}
#endif // _TARGET_ARM_
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index 97547d589e..2025b3f5f0 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -27,46 +27,49 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
//------------------------------------------------------------------------
-// BuildNode: Set the register requirements for RA.
+// BuildNode: Build the RefPositions for for a node
//
-// Notes:
-// Takes care of annotating the register requirements
-// for every TreeNodeInfo struct that maps to each tree node.
+// Arguments:
+// treeNode - the node of interest
+//
+// Return Value:
+// The number of sources consumed by this node.
//
+// Notes:
// Preconditions:
-// LSRA has been initialized and there is a TreeNodeInfo node
-// already allocated and initialized for every tree in the IR.
+// LSRA Has been initialized.
//
// Postconditions:
-// Every TreeNodeInfo instance has the right annotations on register
-// requirements needed by LSRA to build the Interval Table (source,
-// destination and internal [temp] register counts).
+// RefPositions have been built for all the register defs and uses required
+// for this node.
//
-void LinearScan::BuildNode(GenTree* tree)
+int LinearScan::BuildNode(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
- unsigned kind = tree->OperKind();
- RegisterType registerType = TypeGet(tree);
+ assert(!tree->isContained());
+ Interval* prefSrcInterval = nullptr;
+ int srcCount;
+ int dstCount = 0;
+ regMaskTP dstCandidates = RBM_NONE;
+ regMaskTP killMask = RBM_NONE;
+ bool isLocalDefUse = false;
- if (tree->isContained())
- {
- info->dstCount = 0;
- assert(info->srcCount == 0);
- return;
- }
+ // Reset the build-related members of LinearScan.
+ clearBuildState();
+
+ RegisterType registerType = TypeGet(tree);
// Set the default dstCount. This may be modified below.
if (tree->IsValue())
{
- info->dstCount = 1;
+ dstCount = 1;
if (tree->IsUnusedValue())
{
- info->isLocalDefUse = true;
+ isLocalDefUse = true;
}
}
else
{
- info->dstCount = 0;
+ dstCount = 0;
}
switch (tree->OperGet())
@@ -75,20 +78,55 @@ void LinearScan::BuildNode(GenTree* tree)
GenTree* op2;
default:
- BuildSimple(tree);
+ srcCount = BuildSimple(tree);
break;
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ {
+ // We handle tracked variables differently from non-tracked ones. If it is tracked,
+ // we will simply add a use of the tracked variable at its parent/consumer.
+ // Otherwise, for a use we need to actually add the appropriate references for loading
+ // or storing the variable.
+ //
+ // A tracked variable won't actually get used until the appropriate ancestor tree node
+ // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
+ // to a call or an orphaned dead node.
+ //
+ LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
+ if (isCandidateVar(varDsc))
+ {
+ INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1));
+ return 0;
+ }
+ srcCount = 0;
+#ifdef FEATURE_SIMD
+ // Need an additional register to read upper 4 bytes of Vector3.
+ if (tree->TypeGet() == TYP_SIMD12)
+ {
+ // We need an internal register different from targetReg in which 'tree' produces its result
+ // because both targetReg and internal reg will be in use at the same time.
+ buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
+ setInternalRegsDelayFree = true;
+ buildInternalRegisterUses();
+ }
+#endif
+ BuildDef(tree);
+ }
+ break;
+
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- info->srcCount = 1;
- assert(info->dstCount == 0);
- BuildStoreLoc(tree->AsLclVarCommon());
+ srcCount = 1;
+ assert(dstCount == 0);
+ srcCount = BuildStoreLoc(tree->AsLclVarCommon());
break;
case GT_FIELD_LIST:
// These should always be contained. We don't correctly allocate or
// generate code for a non-contained GT_FIELD_LIST.
noway_assert(!"Non-contained GT_FIELD_LIST");
+ srcCount = 0;
break;
case GT_LIST:
@@ -96,108 +134,112 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_NO_OP:
case GT_START_NONGC:
case GT_PROF_HOOK:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_CNS_DBL:
- info->srcCount = 0;
- assert(info->dstCount == 1);
- {
- GenTreeDblCon* dblConst = tree->AsDblCon();
- double constValue = dblConst->gtDblCon.gtDconVal;
+ {
+ GenTreeDblCon* dblConst = tree->AsDblCon();
+ double constValue = dblConst->gtDblCon.gtDconVal;
- if (emitter::emitIns_valid_imm_for_fmov(constValue))
- {
- // Directly encode constant to instructions.
- }
- else
- {
- // Reserve int to load constant from memory (IF_LARGELDC)
- info->internalIntCount = 1;
- }
+ if (emitter::emitIns_valid_imm_for_fmov(constValue))
+ {
+ // Directly encode constant to instructions.
}
- break;
+ else
+ {
+ // Reserve int to load constant from memory (IF_LARGELDC)
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalRegisterUses();
+ }
+ }
+ __fallthrough;
+
+ case GT_CNS_INT:
+ {
+ srcCount = 0;
+ assert(dstCount == 1);
+ RefPosition* def = BuildDef(tree);
+ def->getInterval()->isConstant = true;
+ }
+ break;
case GT_BOX:
case GT_COMMA:
case GT_QMARK:
case GT_COLON:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
unreached();
break;
case GT_RETURN:
- BuildReturn(tree);
+ srcCount = BuildReturn(tree);
break;
case GT_RETFILT:
+ assert(dstCount == 0);
if (tree->TypeGet() == TYP_VOID)
{
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
}
else
{
assert(tree->TypeGet() == TYP_INT);
-
- info->srcCount = 1;
- assert(info->dstCount == 0);
-
- info->setSrcCandidates(this, RBM_INTRET);
- LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
- locationInfo->info.setSrcCandidates(this, RBM_INTRET);
- useList.Append(locationInfo);
+ srcCount = 1;
+ BuildUse(tree->gtGetOp1(), RBM_INTRET);
}
break;
case GT_NOP:
// A GT_NOP is either a passthrough (if it is void, or if it has
// a child), but must be considered to produce a dummy value if it
- // has a type but no child
- info->srcCount = 0;
- if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ // has a type but no child.
+ srcCount = 0;
+ if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
{
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
+ BuildDef(tree);
}
else
{
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
}
break;
case GT_JTRUE:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_JMP:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_SWITCH:
// This should never occur since switch nodes must not be visible at this
// point in the JIT.
- info->srcCount = 0;
+ srcCount = 0;
noway_assert(!"Switch must be lowered at this point");
break;
case GT_JMPTABLE:
- info->srcCount = 0;
- assert(info->dstCount == 1);
+ srcCount = 0;
+ assert(dstCount == 1);
+ BuildDef(tree);
break;
case GT_SWITCH_TABLE:
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- info->internalIntCount = 1;
- assert(info->dstCount == 0);
+ buildInternalIntRegisterDefForNode(tree);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(dstCount == 0);
break;
case GT_ASG:
noway_assert(!"We should never hit any assignment operator in lowering");
- info->srcCount = 0;
+ srcCount = 0;
break;
case GT_ADD:
@@ -209,7 +251,7 @@ void LinearScan::BuildNode(GenTree* tree)
// No implicit conversions at this stage as the expectation is that
// everything is made explicit by adding casts.
- assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+ assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
}
__fallthrough;
@@ -221,30 +263,34 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_RSH:
case GT_RSZ:
case GT_ROR:
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->dstCount == 1);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(dstCount == 1);
+ BuildDef(tree);
break;
case GT_RETURNTRAP:
// this just turns into a compare of its child with an int
// + a conditional call
- appendLocationInfoToList(tree->gtGetOp1());
- info->srcCount = 1;
- assert(info->dstCount == 0);
+ BuildUse(tree->gtGetOp1());
+ srcCount = 1;
+ assert(dstCount == 0);
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+ BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
break;
case GT_MOD:
case GT_UMOD:
NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
+ srcCount = 0;
break;
case GT_MUL:
if (tree->gtOverflow())
{
// Need a register different from target reg to check for overflow.
- info->internalIntCount = 1;
- info->isInternalRegDelayFree = true;
+ buildInternalIntRegisterDefForNode(tree);
+ setInternalRegsDelayFree = true;
}
__fallthrough;
@@ -252,8 +298,10 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_MULHI:
case GT_UDIV:
{
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->dstCount == 1);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ buildInternalRegisterUses();
+ assert(dstCount == 1);
+ BuildDef(tree);
}
break;
@@ -266,25 +314,26 @@ void LinearScan::BuildNode(GenTree* tree)
(tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
// Both operand and its result must be of the same floating point type.
- op1 = tree->gtOp.gtOp1;
+ op1 = tree->gtGetOp1();
assert(varTypeIsFloating(op1));
assert(op1->TypeGet() == tree->TypeGet());
- appendLocationInfoToList(op1);
- info->srcCount = 1;
- assert(info->dstCount == 1);
+ BuildUse(op1);
+ srcCount = 1;
+ assert(dstCount == 1);
+ BuildDef(tree);
}
break;
#ifdef FEATURE_SIMD
case GT_SIMD:
- BuildSIMD(tree->AsSIMD());
+ srcCount = BuildSIMD(tree->AsSIMD());
break;
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
case GT_HWIntrinsic:
- BuildHWIntrinsic(tree->AsHWIntrinsic());
+ srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic());
break;
#endif // FEATURE_HW_INTRINSICS
@@ -294,10 +343,6 @@ void LinearScan::BuildNode(GenTree* tree)
// register.
// see CodeGen::genIntToIntCast()
- appendLocationInfoToList(tree->gtGetOp1());
- info->srcCount = 1;
- assert(info->dstCount == 1);
-
// Non-overflow casts to/from float/double are done using SSE2 instructions
// and that allow the source operand to be either a reg or memop. Given the
// fact that casts from small int to float/double are done as two-level casts,
@@ -330,17 +375,23 @@ void LinearScan::BuildNode(GenTree* tree)
if (!canStoreMaxValue || !canStoreMinValue)
{
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
}
+ BuildUse(tree->gtGetOp1());
+ srcCount = 1;
+ buildInternalRegisterUses();
+ assert(dstCount == 1);
+ BuildDef(tree);
}
break;
case GT_NEG:
case GT_NOT:
- appendLocationInfoToList(tree->gtGetOp1());
- info->srcCount = 1;
- assert(info->dstCount == 1);
+ BuildUse(tree->gtGetOp1());
+ srcCount = 1;
+ assert(dstCount == 1);
+ BuildDef(tree);
break;
case GT_EQ:
@@ -352,42 +403,42 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_JCMP:
- BuildCmp(tree);
+ srcCount = BuildCmp(tree);
break;
case GT_CKFINITE:
- appendLocationInfoToList(tree->gtOp.gtOp1);
- info->srcCount = 1;
- assert(info->dstCount == 1);
- info->internalIntCount = 1;
+ srcCount = 1;
+ assert(dstCount == 1);
+ buildInternalIntRegisterDefForNode(tree);
+ BuildUse(tree->gtGetOp1());
+ BuildDef(tree);
+ buildInternalRegisterUses();
break;
case GT_CMPXCHG:
{
GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
- info->srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
- assert(info->dstCount == 1);
+ srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
+ assert(dstCount == 1);
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
// For ARMv8 exclusives the lifetime of the addr and data must be extended because
// it may be used used multiple during retries
- LocationInfoListNode* locationInfo = getLocationInfo(tree->gtCmpXchg.gtOpLocation);
- locationInfo->info.isDelayFree = true;
- useList.Append(locationInfo);
- LocationInfoListNode* valueInfo = getLocationInfo(tree->gtCmpXchg.gtOpValue);
- valueInfo->info.isDelayFree = true;
- useList.Append(valueInfo);
+ RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation);
+ setDelayFree(locationUse);
+ RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue);
+ setDelayFree(valueUse);
if (!cmpXchgNode->gtOpComparand->isContained())
{
- LocationInfoListNode* comparandInfo = getLocationInfo(tree->gtCmpXchg.gtOpComparand);
- comparandInfo->info.isDelayFree = true;
- useList.Append(comparandInfo);
+ RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand);
+ setDelayFree(comparandUse);
}
- info->hasDelayFreeSrc = true;
// Internals may not collide with target
- info->isInternalRegDelayFree = true;
+ setInternalRegsDelayFree = true;
+ buildInternalRegisterUses();
+ BuildDef(tree);
}
break;
@@ -395,55 +446,69 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_XADD:
case GT_XCHG:
{
- assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
- info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
- info->internalIntCount = (tree->OperGet() == GT_XCHG) ? 1 : 2;
+ assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
+ srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;
+
+ // GT_XCHG requires a single internal regiester; the others require two.
+ buildInternalIntRegisterDefForNode(tree);
+ if (tree->OperGet() != GT_XCHG)
+ {
+ buildInternalIntRegisterDefForNode(tree);
+ }
// For ARMv8 exclusives the lifetime of the addr and data must be extended because
// it may be used used multiple during retries
- assert(!tree->gtOp.gtOp1->isContained());
- LocationInfoListNode* op1Info = getLocationInfo(tree->gtOp.gtOp1);
- useList.Append(op1Info);
- LocationInfoListNode* op2Info = nullptr;
- if (!tree->gtOp.gtOp2->isContained())
+ assert(!tree->gtGetOp1()->isContained());
+ RefPosition* op1Use = BuildUse(tree->gtGetOp1());
+ RefPosition* op2Use = nullptr;
+ if (!tree->gtGetOp2()->isContained())
{
- op2Info = getLocationInfo(tree->gtOp.gtOp2);
- useList.Append(op2Info);
+ op2Use = BuildUse(tree->gtGetOp2());
}
- if (info->dstCount != 0)
+
+ // Internals may not collide with target
+ if (dstCount == 1)
{
- op1Info->info.isDelayFree = true;
- if (op2Info != nullptr)
+ setDelayFree(op1Use);
+ if (op2Use != nullptr)
{
- op2Info->info.isDelayFree = true;
+ setDelayFree(op2Use);
}
- // Internals may not collide with target
- info->isInternalRegDelayFree = true;
- info->hasDelayFreeSrc = true;
+ setInternalRegsDelayFree = true;
+ }
+ buildInternalRegisterUses();
+ if (dstCount == 1)
+ {
+ BuildDef(tree);
}
}
break;
case GT_PUTARG_STK:
- BuildPutArgStk(tree->AsPutArgStk());
+ srcCount = BuildPutArgStk(tree->AsPutArgStk());
break;
case GT_PUTARG_REG:
- BuildPutArgReg(tree->AsUnOp());
+ srcCount = BuildPutArgReg(tree->AsUnOp());
break;
case GT_CALL:
- BuildCall(tree->AsCall());
+ srcCount = BuildCall(tree->AsCall());
+ if (tree->AsCall()->HasMultiRegRetVal())
+ {
+ dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+ }
break;
case GT_ADDR:
{
// For a GT_ADDR, the child node should not be evaluated into a register
- GenTree* child = tree->gtOp.gtOp1;
+ GenTree* child = tree->gtGetOp1();
assert(!isCandidateLocalRef(child));
assert(child->isContained());
- assert(info->dstCount == 1);
- info->srcCount = 0;
+ assert(dstCount == 1);
+ srcCount = 0;
+ BuildDef(tree);
}
break;
@@ -451,23 +516,24 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_DYN_BLK:
// These should all be eliminated prior to Lowering.
assert(!"Non-store block node in Lowering");
- info->srcCount = 0;
+ srcCount = 0;
break;
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- BuildBlockStore(tree->AsBlk());
+ srcCount = BuildBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
// Always a passthrough of its child's value.
assert(!"INIT_VAL should always be contained");
+ srcCount = 0;
break;
case GT_LCLHEAP:
{
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
// Here '-' means don't care.
@@ -489,19 +555,15 @@ void LinearScan::BuildNode(GenTree* tree)
hasPspSym = false;
#endif
- GenTree* size = tree->gtOp.gtOp1;
+ GenTree* size = tree->gtGetOp1();
if (size->IsCnsIntOrI())
{
assert(size->isContained());
- info->srcCount = 0;
+ srcCount = 0;
size_t sizeVal = size->gtIntCon.gtIconVal;
- if (sizeVal == 0)
- {
- info->internalIntCount = 0;
- }
- else
+ if (sizeVal != 0)
{
// Compute the amount of memory to properly STACK_ALIGN.
// Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
@@ -514,55 +576,64 @@ void LinearScan::BuildNode(GenTree* tree)
//
if (cntStackAlignedWidthItems <= 4)
{
- info->internalIntCount = 0;
+ // Need no internal registers
}
else if (!compiler->info.compInitMem)
{
// No need to initialize allocated stack space.
if (sizeVal < compiler->eeGetPageSize())
{
- info->internalIntCount = 0;
+ // Need no internal registers
}
else
{
// We need two registers: regCnt and RegTmp
- info->internalIntCount = 2;
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalIntRegisterDefForNode(tree);
}
}
- else
+ else if (hasPspSym)
{
// greater than 4 and need to zero initialize allocated stack space.
// If the method has PSPSym, we need an internal register to hold regCnt
// since targetReg allocated to GT_LCLHEAP node could be the same as one of
// the the internal registers.
- info->internalIntCount = hasPspSym ? 1 : 0;
+ buildInternalIntRegisterDefForNode(tree);
}
}
}
else
{
- appendLocationInfoToList(size);
- info->srcCount = 1;
+ srcCount = 1;
if (!compiler->info.compInitMem)
{
- info->internalIntCount = 2;
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalIntRegisterDefForNode(tree);
}
- else
+ else if (hasPspSym)
{
// If the method has PSPSym, we need an internal register to hold regCnt
// since targetReg allocated to GT_LCLHEAP node could be the same as one of
// the the internal registers.
- info->internalIntCount = hasPspSym ? 1 : 0;
+ buildInternalIntRegisterDefForNode(tree);
}
}
- // If the method has PSPSym, we would need an addtional register to relocate it on stack.
+ // If the method has PSPSym, we need an additional register to relocate it on stack.
if (hasPspSym)
{
// Exclude const size 0
if (!size->IsCnsIntOrI() || (size->gtIntCon.gtIconVal > 0))
- info->internalIntCount++;
+ {
+ buildInternalIntRegisterDefForNode(tree);
+ }
}
+ if (!size->isContained())
+ {
+ BuildUse(size);
+ }
+ buildInternalRegisterUses();
+ BuildDef(tree);
}
break;
@@ -573,52 +644,54 @@ void LinearScan::BuildNode(GenTree* tree)
{
GenTreeBoundsChk* node = tree->AsBoundsChk();
// Consumes arrLen & index - has no result
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
GenTree* intCns = nullptr;
GenTree* other = nullptr;
- info->srcCount = GetOperandInfo(tree->AsBoundsChk()->gtIndex);
- info->srcCount += GetOperandInfo(tree->AsBoundsChk()->gtArrLen);
+ srcCount = BuildOperandUses(tree->AsBoundsChk()->gtIndex);
+ srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen);
}
break;
case GT_ARR_ELEM:
// These must have been lowered to GT_ARR_INDEX
noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_ARR_INDEX:
{
- info->srcCount = 2;
- assert(info->dstCount == 1);
- info->internalIntCount = 1;
- info->isInternalRegDelayFree = true;
+ srcCount = 2;
+ assert(dstCount == 1);
+ buildInternalIntRegisterDefForNode(tree);
+ setInternalRegsDelayFree = true;
// For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
// times while the result is being computed.
- LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj());
- arrObjInfo->info.isDelayFree = true;
- useList.Append(arrObjInfo);
- useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr()));
- info->hasDelayFreeSrc = true;
+ RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
+ setDelayFree(arrObjUse);
+ BuildUse(tree->AsArrIndex()->IndexExpr());
+ buildInternalRegisterUses();
+ BuildDef(tree);
}
break;
case GT_ARR_OFFSET:
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- info->srcCount = 2;
+ srcCount = 2;
if (!tree->gtArrOffs.gtOffset->isContained())
{
- appendLocationInfoToList(tree->AsArrOffs()->gtOffset);
- info->srcCount++;
+ BuildUse(tree->AsArrOffs()->gtOffset);
+ srcCount++;
}
- appendLocationInfoToList(tree->AsArrOffs()->gtIndex);
- appendLocationInfoToList(tree->AsArrOffs()->gtArrObj);
- assert(info->dstCount == 1);
- info->internalIntCount = 1;
+ BuildUse(tree->AsArrOffs()->gtIndex);
+ BuildUse(tree->AsArrOffs()->gtArrObj);
+ assert(dstCount == 1);
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalRegisterUses();
+ BuildDef(tree);
break;
case GT_LEA:
@@ -630,50 +703,51 @@ void LinearScan::BuildNode(GenTree* tree)
int cns = lea->Offset();
// This LEA is instantiating an address, so we set up the srcCount here.
- info->srcCount = 0;
+ srcCount = 0;
if (base != nullptr)
{
- info->srcCount++;
- appendLocationInfoToList(base);
+ srcCount++;
+ BuildUse(base);
}
if (index != nullptr)
{
- info->srcCount++;
- appendLocationInfoToList(index);
+ srcCount++;
+ BuildUse(index);
}
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
// On ARM64 we may need a single internal register
// (when both conditions are true then we still only need a single internal register)
if ((index != nullptr) && (cns != 0))
{
// ARM64 does not support both Index and offset so we need an internal register
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
{
// This offset can't be contained in the add instruction, so we need an internal register
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
+ buildInternalRegisterUses();
+ BuildDef(tree);
}
break;
case GT_STOREIND:
{
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
- info->srcCount = 2;
- BuildGCWriteBarrier(tree);
+ srcCount = BuildGCWriteBarrier(tree);
break;
}
- BuildIndir(tree->AsIndir());
+ srcCount = BuildIndir(tree->AsIndir());
if (!tree->gtGetOp2()->isContained())
{
- appendLocationInfoToList(tree->gtGetOp2());
- info->srcCount++;
+ BuildUse(tree->gtGetOp2());
+ srcCount++;
}
}
break;
@@ -681,51 +755,58 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_NULLCHECK:
// Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register
// is required, and it is not a localDefUse.
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
assert(!tree->gtGetOp1()->isContained());
- appendLocationInfoToList(tree->gtOp.gtOp1);
- info->srcCount = 1;
+ BuildUse(tree->gtGetOp1());
+ srcCount = 1;
break;
case GT_IND:
- assert(info->dstCount == 1);
- BuildIndir(tree->AsIndir());
+ assert(dstCount == 1);
+ srcCount = BuildIndir(tree->AsIndir());
break;
case GT_CATCH_ARG:
- info->srcCount = 0;
- assert(info->dstCount == 1);
- info->setDstCandidates(this, RBM_EXCEPTION_OBJECT);
+ srcCount = 0;
+ assert(dstCount == 1);
+ BuildDef(tree, RBM_EXCEPTION_OBJECT);
break;
case GT_CLS_VAR:
- info->srcCount = 0;
+ srcCount = 0;
// GT_CLS_VAR, by the time we reach the backend, must always
// be a pure use.
// It will produce a result of the type of the
// node, and use an internal register for the address.
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalRegisterUses();
+ BuildDef(tree);
break;
case GT_INDEX_ADDR:
- assert(info->dstCount == 1);
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- info->internalIntCount = 1;
+ assert(dstCount == 1);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalRegisterUses();
+ BuildDef(tree);
break;
+
} // end switch (tree->OperGet())
- if (tree->IsUnusedValue() && (info->dstCount != 0))
+ if (tree->IsUnusedValue() && (dstCount != 0))
{
- info->isLocalDefUse = true;
+ isLocalDefUse = true;
}
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || tree->IsMultiRegCall());
- assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
- assert(!tree->IsUnusedValue() || (info->dstCount != 0));
- assert(info->dstCount == tree->GetRegisterDstCount());
+ // We need to be sure that we've set srcCount and dstCount appropriately
+ assert((dstCount < 2) || tree->IsMultiRegCall());
+ assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
+ assert(!tree->IsUnusedValue() || (dstCount != 0));
+ assert(dstCount == tree->GetRegisterDstCount());
+ INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount));
+ return srcCount;
}
#ifdef FEATURE_SIMD
@@ -736,35 +817,27 @@ void LinearScan::BuildNode(GenTree* tree)
// tree - The GT_SIMD node of interest
//
// Return Value:
-// None.
-
-void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
+// The number of sources consumed by this node.
+//
+int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
{
- TreeNodeInfo* info = currentNodeInfo;
+ int srcCount = 0;
// Only SIMDIntrinsicInit can be contained
if (simdTree->isContained())
{
assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
}
- assert(info->dstCount == 1);
+ int dstCount = simdTree->IsValue() ? 1 : 0;
+ assert(dstCount == 1);
- GenTree* op1 = simdTree->gtOp.gtOp1;
- GenTree* op2 = simdTree->gtOp.gtOp2;
- if (!op1->OperIs(GT_LIST))
- {
- info->srcCount += GetOperandInfo(op1);
- }
- if ((op2 != nullptr) && !op2->isContained())
- {
- info->srcCount += GetOperandInfo(op2);
- }
+ bool buildUses = true;
+
+ GenTree* op1 = simdTree->gtGetOp1();
+ GenTree* op2 = simdTree->gtGetOp2();
switch (simdTree->gtSIMDIntrinsicID)
{
case SIMDIntrinsicInit:
- assert(info->srcCount == (simdTree->gtGetOp1()->isContained() ? 0 : 1));
- break;
-
case SIMDIntrinsicCast:
case SIMDIntrinsicSqrt:
case SIMDIntrinsicAbs:
@@ -774,7 +847,7 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
case SIMDIntrinsicConvertToInt64:
case SIMDIntrinsicWidenLo:
case SIMDIntrinsicWidenHi:
- assert(info->srcCount == 1);
+ // No special handling required.
break;
case SIMDIntrinsicGetItem:
@@ -783,17 +856,26 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
op2 = simdTree->gtGetOp2();
// We have an object and an index, either of which may be contained.
+ bool setOp2DelayFree = false;
if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
{
- // If the index is not a constant and not contained or is a local
+ // If the index is not a constant and the object is not contained or is a local
// we will need a general purpose register to calculate the address
- info->internalIntCount = 1;
-
// internal register must not clobber input index
- LocationInfoListNode* op2Info =
- (op1->isContained()) ? useList.Begin() : useList.GetSecond(INDEBUG(op2));
- op2Info->info.isDelayFree = true;
- info->hasDelayFreeSrc = true;
+ // TODO-Cleanup: An internal register will never clobber a source; this code actually
+ // ensures that the index (op2) doesn't interfere with the target.
+ buildInternalIntRegisterDefForNode(simdTree);
+ setOp2DelayFree = true;
+ }
+ srcCount += BuildOperandUses(op1);
+ if (!op2->isContained())
+ {
+ RefPosition* op2Use = BuildUse(op2);
+ if (setOp2DelayFree)
+ {
+ setDelayFree(op2Use);
+ }
+ srcCount++;
}
if (!op2->IsCnsIntOrI() && (!op1->isContained()))
@@ -802,6 +884,7 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
// we will use the SIMD temp location to store the vector.
compiler->getSIMDInitTempVarNum();
}
+ buildUses = false;
}
break;
@@ -820,7 +903,7 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
case SIMDIntrinsicGreaterThan:
case SIMDIntrinsicLessThanOrEqual:
case SIMDIntrinsicGreaterThanOrEqual:
- assert(info->srcCount == 2);
+ // No special handling required.
break;
case SIMDIntrinsicSetX:
@@ -828,54 +911,53 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
case SIMDIntrinsicSetZ:
case SIMDIntrinsicSetW:
case SIMDIntrinsicNarrow:
- assert(info->srcCount == 2);
-
+ {
// Op1 will write to dst before Op2 is free
- useList.GetSecond(INDEBUG(simdTree->gtGetOp2()))->info.isDelayFree = true;
- info->hasDelayFreeSrc = true;
+ BuildUse(op1);
+ RefPosition* op2Use = BuildUse(op2);
+ setDelayFree(op2Use);
+ srcCount = 2;
+ buildUses = false;
break;
+ }
case SIMDIntrinsicInitN:
{
var_types baseType = simdTree->gtSIMDBaseType;
- info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
- int initCount = 0;
+ srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
+ if (varTypeIsFloating(simdTree->gtSIMDBaseType))
+ {
+ // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
+ buildInternalFloatRegisterDefForNode(simdTree);
+ }
+
+ int initCount = 0;
for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
{
assert(list->OperGet() == GT_LIST);
GenTree* listItem = list->gtGetOp1();
assert(listItem->TypeGet() == baseType);
assert(!listItem->isContained());
- appendLocationInfoToList(listItem);
+ BuildUse(listItem);
initCount++;
}
- assert(initCount == info->srcCount);
+ assert(initCount == srcCount);
+ buildUses = false;
- if (varTypeIsFloating(simdTree->gtSIMDBaseType))
- {
- // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
- info->setInternalCandidates(this, RBM_ALLFLOAT);
- info->internalFloatCount = 1;
- }
break;
}
case SIMDIntrinsicInitArray:
// We have an array and an index, which may be contained.
- assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2));
break;
case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
- assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2));
- info->setInternalCandidates(this, RBM_ALLFLOAT);
- info->internalFloatCount = 1;
+ buildInternalFloatRegisterDefForNode(simdTree);
break;
case SIMDIntrinsicDotProduct:
- assert(info->srcCount == 2);
- info->setInternalCandidates(this, RBM_ALLFLOAT);
- info->internalFloatCount = 1;
+ buildInternalFloatRegisterDefForNode(simdTree);
break;
case SIMDIntrinsicSelect:
@@ -883,9 +965,7 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
// bsl target register must be VC. Reserve a temp in case we need to shuffle things.
// This will require a different approach, as GenTreeSIMD has only two operands.
assert(!"SIMDIntrinsicSelect not yet supported");
- assert(info->srcCount == 3);
- info->setInternalCandidates(this, RBM_ALLFLOAT);
- info->internalFloatCount = 1;
+ buildInternalFloatRegisterDefForNode(simdTree);
break;
case SIMDIntrinsicInitArrayX:
@@ -912,6 +992,27 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
noway_assert(!"Unimplemented SIMD node type.");
unreached();
}
+ if (buildUses)
+ {
+ assert(!op1->OperIs(GT_LIST));
+ assert(srcCount == 0);
+ srcCount = BuildOperandUses(op1);
+ if ((op2 != nullptr) && !op2->isContained())
+ {
+ srcCount += BuildOperandUses(op2);
+ }
+ }
+ assert(internalCount <= MaxInternalCount);
+ buildInternalRegisterUses();
+ if (dstCount == 1)
+ {
+ BuildDef(simdTree);
+ }
+ else
+ {
+ assert(dstCount == 0);
+ }
+ return srcCount;
}
#endif // FEATURE_SIMD
@@ -924,69 +1025,59 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
// tree - The GT_HWIntrinsic node of interest
//
// Return Value:
-// None.
-
-void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
+// The number of sources consumed by this node.
+//
+int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
- TreeNodeInfo* info = currentNodeInfo;
NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
+ int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicTree);
- GenTreeArgList* argList = nullptr;
- GenTree* op1 = intrinsicTree->gtOp.gtOp1;
- GenTree* op2 = intrinsicTree->gtOp.gtOp2;
-
- if (op1->OperIs(GT_LIST))
- {
- argList = op1->AsArgList();
- op1 = argList->Current();
- op2 = argList->Rest()->Current();
+ GenTree* op1 = intrinsicTree->gtGetOp1();
+ GenTree* op2 = intrinsicTree->gtGetOp2();
+ GenTree* op3 = nullptr;
+ int srcCount = 0;
- for (GenTreeArgList* list = argList; list != nullptr; list = list->Rest())
- {
- info->srcCount += GetOperandInfo(list->Current());
- }
- }
- else
+ if ((op1 != nullptr) && op1->OperIsList())
{
- info->srcCount += GetOperandInfo(op1);
- if (op2 != nullptr)
- {
- info->srcCount += GetOperandInfo(op2);
- }
+ // op2 must be null, and there must be at least two more arguments.
+ assert(op2 == nullptr);
+ noway_assert(op1->AsArgList()->Rest() != nullptr);
+ noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr);
+ assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
+ op2 = op1->AsArgList()->Rest()->Current();
+ op3 = op1->AsArgList()->Rest()->Rest()->Current();
+ op1 = op1->AsArgList()->Current();
}
+ int dstCount = intrinsicTree->IsValue() ? 1 : 0;
+ bool op2IsDelayFree = false;
+ bool op3IsDelayFree = false;
+
+ // Create internal temps, and handle any other special requirements.
switch (compiler->getHWIntrinsicInfo(intrinsicID).form)
{
case HWIntrinsicInfo::Sha1HashOp:
- info->setInternalCandidates(this, RBM_ALLFLOAT);
- info->internalFloatCount = 1;
+ assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
if (!op2->isContained())
{
- LocationInfoListNode* op2Info = useList.Begin()->Next();
- op2Info->info.isDelayFree = true;
- GenTree* op3 = intrinsicTree->gtOp.gtOp1->AsArgList()->Rest()->Rest()->Current();
assert(!op3->isContained());
- LocationInfoListNode* op3Info = op2Info->Next();
- op3Info->info.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- info->isInternalRegDelayFree = true;
+ op2IsDelayFree = true;
+ op3IsDelayFree = true;
+ setInternalRegsDelayFree = true;
}
+ buildInternalFloatRegisterDefForNode(intrinsicTree);
break;
case HWIntrinsicInfo::SimdTernaryRMWOp:
+ assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
if (!op2->isContained())
{
- LocationInfoListNode* op2Info = useList.Begin()->Next();
- op2Info->info.isDelayFree = true;
- GenTree* op3 = intrinsicTree->gtOp.gtOp1->AsArgList()->Rest()->Rest()->Current();
assert(!op3->isContained());
- LocationInfoListNode* op3Info = op2Info->Next();
- op3Info->info.isDelayFree = true;
- info->hasDelayFreeSrc = true;
+ op2IsDelayFree = true;
+ op3IsDelayFree = true;
}
break;
case HWIntrinsicInfo::Sha1RotateOp:
- info->setInternalCandidates(this, RBM_ALLFLOAT);
- info->internalFloatCount = 1;
+ buildInternalFloatRegisterDefForNode(intrinsicTree);
break;
case HWIntrinsicInfo::SimdExtractOp:
@@ -994,14 +1085,52 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
if (!op2->isContained())
{
// We need a temp to create a switch table
- info->internalIntCount = 1;
- info->setInternalCandidates(this, allRegs(TYP_INT));
+ buildInternalIntRegisterDefForNode(intrinsicTree);
}
break;
default:
break;
}
+
+ // Next, build uses
+ if (numArgs > 3)
+ {
+ srcCount = 0;
+ assert(!op2IsDelayFree && !op3IsDelayFree);
+ assert(op1->OperIs(GT_LIST));
+ {
+ for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
+ {
+ srcCount += BuildOperandUses(list->Current());
+ }
+ }
+ assert(srcCount == numArgs);
+ }
+ else
+ {
+ if (op1 != nullptr)
+ {
+ srcCount += BuildOperandUses(op1);
+ if (op2 != nullptr)
+ {
+ srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2);
+ if (op3 != nullptr)
+ {
+ srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
+ }
+ }
+ }
+ }
+ buildInternalRegisterUses();
+
+ // Now defs
+ if (intrinsicTree->IsValue())
+ {
+ BuildDef(intrinsicTree);
+ }
+
+ return srcCount;
}
#endif
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
index 0331edee23..0afe0e2385 100644
--- a/src/jit/lsraarmarch.cpp
+++ b/src/jit/lsraarmarch.cpp
@@ -33,18 +33,20 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// Arguments:
// indirTree - GT_IND, GT_STOREIND or block gentree node
//
-void LinearScan::BuildIndir(GenTreeIndir* indirTree)
+// Return Value:
+// The number of sources consumed by this node.
+//
+int LinearScan::BuildIndir(GenTreeIndir* indirTree)
{
- TreeNodeInfo* info = currentNodeInfo;
+ int srcCount = 0;
// If this is the rhs of a block copy (i.e. non-enregisterable struct),
// it has no register requirements.
if (indirTree->TypeGet() == TYP_STRUCT)
{
- return;
+ return srcCount;
}
- bool isStore = (indirTree->gtOper == GT_STOREIND);
- info->srcCount = GetIndirInfo(indirTree);
+ bool isStore = (indirTree->gtOper == GT_STOREIND);
GenTree* addr = indirTree->Addr();
GenTree* index = nullptr;
@@ -66,11 +68,12 @@ void LinearScan::BuildIndir(GenTreeIndir* indirTree)
if (type == TYP_FLOAT)
{
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(indirTree);
}
else if (type == TYP_DOUBLE)
{
- info->internalIntCount = 2;
+ buildInternalIntRegisterDefForNode(indirTree);
+ buildInternalIntRegisterDefForNode(indirTree);
}
}
#endif
@@ -87,12 +90,12 @@ void LinearScan::BuildIndir(GenTreeIndir* indirTree)
if ((index != nullptr) && (cns != 0))
{
// ARM does not support both Index and offset so we need an internal register
- info->internalIntCount++;
+ buildInternalIntRegisterDefForNode(indirTree);
}
else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
{
// This offset can't be contained in the ldr/str instruction, so we need an internal register
- info->internalIntCount++;
+ buildInternalIntRegisterDefForNode(indirTree);
}
}
@@ -100,13 +103,22 @@ void LinearScan::BuildIndir(GenTreeIndir* indirTree)
if (indirTree->TypeGet() == TYP_SIMD12)
{
// If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir().
- assert(!indirTree->Addr()->isContained());
+ assert(!addr->isContained());
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
// To assemble the vector properly we would need an additional int register
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(indirTree);
}
#endif // FEATURE_SIMD
+
+ srcCount = BuildIndirUses(indirTree);
+ buildInternalRegisterUses();
+
+ if (indirTree->gtOper != GT_STOREIND)
+ {
+ BuildDef(indirTree);
+ }
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -116,36 +128,33 @@ void LinearScan::BuildIndir(GenTreeIndir* indirTree)
// call - The call node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildCall(GenTreeCall* call)
+int LinearScan::BuildCall(GenTreeCall* call)
{
- TreeNodeInfo* info = currentNodeInfo;
bool hasMultiRegRetVal = false;
ReturnTypeDesc* retTypeDesc = nullptr;
+ regMaskTP dstCandidates = RBM_NONE;
- info->srcCount = 0;
+ int srcCount = 0;
+ int dstCount = 0;
if (call->TypeGet() != TYP_VOID)
{
hasMultiRegRetVal = call->HasMultiRegRetVal();
if (hasMultiRegRetVal)
{
// dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
+ retTypeDesc = call->GetReturnTypeDesc();
+ dstCount = retTypeDesc->GetReturnRegCount();
}
else
{
- info->dstCount = 1;
+ dstCount = 1;
}
}
- else
- {
- info->dstCount = 0;
- }
- GenTree* ctrlExpr = call->gtControlExpr;
- LocationInfoListNode* ctrlExprInfo = nullptr;
+ GenTree* ctrlExpr = call->gtControlExpr;
+ regMaskTP ctrlExprCandidates = RBM_NONE;
if (call->gtCallType == CT_INDIRECT)
{
// either gtControlExpr != null or gtCallAddr != null.
@@ -158,8 +167,6 @@ void LinearScan::BuildCall(GenTreeCall* call)
// set reg requirements on call target represented as control sequence.
if (ctrlExpr != nullptr)
{
- ctrlExprInfo = getLocationInfo(ctrlExpr);
-
// we should never see a gtControlExpr whose type is void.
assert(ctrlExpr->TypeGet() != TYP_VOID);
@@ -169,14 +176,20 @@ void LinearScan::BuildCall(GenTreeCall* call)
{
// Fast tail call - make sure that call target is always computed in R12(ARM32)/IP0(ARM64)
// so that epilog sequence can generate "br xip0/r12" to achieve fast tail call.
- ctrlExprInfo->info.setSrcCandidates(this, RBM_FASTTAILCALL_TARGET);
+ ctrlExprCandidates = RBM_FASTTAILCALL_TARGET;
}
}
#ifdef _TARGET_ARM_
else
{
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(call);
+ }
+
+ if (call->NeedsNullCheck())
+ {
+ buildInternalIntRegisterDefForNode(call);
}
+
#endif // _TARGET_ARM_
RegisterType registerType = call->TypeGet();
@@ -188,26 +201,26 @@ void LinearScan::BuildCall(GenTreeCall* call)
{
// The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
// TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
- info->setDstCandidates(this, RBM_PINVOKE_TCB);
+ dstCandidates = RBM_PINVOKE_TCB;
}
else
#endif // _TARGET_ARM_
if (hasMultiRegRetVal)
{
assert(retTypeDesc != nullptr);
- info->setDstCandidates(this, retTypeDesc->GetABIReturnRegs());
+ dstCandidates = retTypeDesc->GetABIReturnRegs();
}
else if (varTypeIsFloating(registerType))
{
- info->setDstCandidates(this, RBM_FLOATRET);
+ dstCandidates = RBM_FLOATRET;
}
else if (registerType == TYP_LONG)
{
- info->setDstCandidates(this, RBM_LNGRET);
+ dstCandidates = RBM_LNGRET;
}
else
{
- info->setDstCandidates(this, RBM_INTRET);
+ dstCandidates = RBM_INTRET;
}
// First, count reg args
@@ -232,26 +245,8 @@ void LinearScan::BuildCall(GenTreeCall* call)
{
// late arg that is not passed in a register
assert(curArgTabEntry->regNum == REG_STK);
- GenTree* putArgChild = argNode->gtGetOp1();
- if (!varTypeIsStruct(putArgChild) && !putArgChild->OperIs(GT_FIELD_LIST))
- {
- unsigned expectedSlots = 1;
-#ifdef _TARGET_ARM_
- // The `double` types could been transformed to `long` on arm, while the actual longs
- // have been decomposed.
- if (putArgChild->TypeGet() == TYP_LONG)
- {
- useList.GetTreeNodeInfo(argNode).srcCount = 2;
- expectedSlots = 2;
- }
- else if (putArgChild->TypeGet() == TYP_DOUBLE)
- {
- expectedSlots = 2;
- }
-#endif // !_TARGET_ARM_
- // Validate the slot count for this arg.
- assert(curArgTabEntry->numSlots == expectedSlots);
- }
+ // These should never be contained.
+ assert(!argNode->isContained());
continue;
}
@@ -263,8 +258,6 @@ void LinearScan::BuildCall(GenTreeCall* call)
// There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
{
- info->srcCount++;
- appendLocationInfoToList(entry->Current());
#ifdef DEBUG
assert(entry->Current()->OperIs(GT_PUTARG_REG));
assert(entry->Current()->gtRegNum == argReg);
@@ -279,6 +272,8 @@ void LinearScan::BuildCall(GenTreeCall* call)
}
#endif // _TARGET_ARM_
#endif
+ BuildUse(entry->Current(), genRegMask(entry->Current()->gtRegNum));
+ srcCount++;
}
}
#ifdef _TARGET_ARM_
@@ -286,8 +281,11 @@ void LinearScan::BuildCall(GenTreeCall* call)
{
unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs;
assert(regCount == curArgTabEntry->numRegs);
- info->srcCount += regCount;
- appendLocationInfoToList(argNode);
+ for (unsigned int i = 0; i < regCount; i++)
+ {
+ BuildUse(argNode, genRegMask(argNode->gtRegNum), i);
+ }
+ srcCount += regCount;
}
#endif
else
@@ -302,14 +300,15 @@ void LinearScan::BuildCall(GenTreeCall* call)
if (argNode->TypeGet() == TYP_LONG)
{
assert(argNode->IsMultiRegNode());
- info->srcCount += 2;
- appendLocationInfoToList(argNode);
+ BuildUse(argNode, genRegMask(argNode->gtRegNum), 0);
+ BuildUse(argNode, genRegMask(genRegArgNext(argNode->gtRegNum)), 1);
+ srcCount += 2;
}
else
#endif // _TARGET_ARM_
{
- appendLocationInfoToList(argNode);
- info->srcCount++;
+ BuildUse(argNode, genRegMask(argNode->gtRegNum));
+ srcCount++;
}
}
}
@@ -324,7 +323,7 @@ void LinearScan::BuildCall(GenTreeCall* call)
GenTree* args = call->gtCallArgs;
while (args)
{
- GenTree* arg = args->gtOp.gtOp1;
+ GenTree* arg = args->gtGetOp1();
// Skip arguments that have been moved to the Late Arg list
if (!(args->gtFlags & GTF_LATE_ARG))
@@ -347,35 +346,33 @@ void LinearScan::BuildCall(GenTreeCall* call)
assert(!arg->IsValue() || arg->IsUnusedValue());
}
}
- args = args->gtOp.gtOp2;
+ args = args->gtGetOp2();
}
// If it is a fast tail call, it is already preferenced to use IP0.
// Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExprInfo != nullptr))
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
{
NYI_ARM("float reg varargs");
// Don't assign the call target to any of the argument registers because
// we will use them to also pass floating point arguments as required
// by Arm64 ABI.
- ctrlExprInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS);
}
- if (ctrlExprInfo != nullptr)
+ if (ctrlExpr != nullptr)
{
- useList.Append(ctrlExprInfo);
- info->srcCount++;
+ BuildUse(ctrlExpr, ctrlExprCandidates);
+ srcCount++;
}
-#ifdef _TARGET_ARM_
+ buildInternalRegisterUses();
- if (call->NeedsNullCheck())
- {
- info->internalIntCount++;
- }
-
-#endif // _TARGET_ARM_
+ // Now generate defs and kills.
+ regMaskTP killMask = getKillSetForCall(call);
+ BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -385,20 +382,18 @@ void LinearScan::BuildCall(GenTreeCall* call)
// argNode - a GT_PUTARG_STK node
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
// Notes:
// Set the child node(s) to be contained when we have a multireg arg
//
-void LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
+int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
{
- TreeNodeInfo* info = currentNodeInfo;
assert(argNode->gtOper == GT_PUTARG_STK);
- GenTree* putArgChild = argNode->gtOp.gtOp1;
+ GenTree* putArgChild = argNode->gtGetOp1();
- info->srcCount = 0;
- info->dstCount = 0;
+ int srcCount = 0;
// Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
@@ -411,24 +406,22 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
// We consume all of the items in the GT_FIELD_LIST
for (GenTreeFieldList* current = putArgChild->AsFieldList(); current != nullptr; current = current->Rest())
{
- appendLocationInfoToList(current->Current());
- info->srcCount++;
+ BuildUse(current->Current());
+ srcCount++;
}
}
else
{
+ // We can use a ldp/stp sequence so we need two internal registers for ARM64; one for ARM.
+ buildInternalIntRegisterDefForNode(argNode);
#ifdef _TARGET_ARM64_
- // We could use a ldp/stp sequence so we need two internal registers
- info->internalIntCount = 2;
-#else // _TARGET_ARM_
- // We could use a ldr/str sequence so we need a internal register
- info->internalIntCount = 1;
-#endif // _TARGET_ARM_
+ buildInternalIntRegisterDefForNode(argNode);
+#endif // _TARGET_ARM64_
if (putArgChild->OperGet() == GT_OBJ)
{
assert(putArgChild->isContained());
- GenTree* objChild = putArgChild->gtOp.gtOp1;
+ GenTree* objChild = putArgChild->gtGetOp1();
if (objChild->OperGet() == GT_LCL_VAR_ADDR)
{
// We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
@@ -441,8 +434,8 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
// We will generate all of the code for the GT_PUTARG_STK and its child node
// as one contained operation
//
- appendLocationInfoToList(objChild);
- info->srcCount = 1;
+ BuildUse(objChild);
+ srcCount = 1;
}
}
else
@@ -455,8 +448,10 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
else
{
assert(!putArgChild->isContained());
- info->srcCount = GetOperandInfo(putArgChild);
+ srcCount = BuildOperandUses(putArgChild);
}
+ buildInternalRegisterUses();
+ return srcCount;
}
#ifdef _TARGET_ARM_
@@ -467,20 +462,20 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
// argNode - a GT_PUTARG_SPLIT node
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
// Notes:
// Set the child node(s) to be contained
//
-void LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
+int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
{
- TreeNodeInfo* info = currentNodeInfo;
+ int srcCount = 0;
assert(argNode->gtOper == GT_PUTARG_SPLIT);
- GenTree* putArgChild = argNode->gtOp.gtOp1;
+ GenTree* putArgChild = argNode->gtGetOp1();
// Registers for split argument corresponds to source
- info->dstCount = argNode->gtNumRegs;
+ int dstCount = argNode->gtNumRegs;
regNumber argReg = argNode->gtRegNum;
regMaskTP argMask = RBM_NONE;
@@ -488,8 +483,6 @@ void LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
{
argMask |= genRegMask((regNumber)((unsigned)argReg + i));
}
- info->setDstCandidates(this, argMask);
- info->setSrcCandidates(this, argMask);
if (putArgChild->OperGet() == GT_FIELD_LIST)
{
@@ -507,21 +500,29 @@ void LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
{
GenTree* node = fieldListPtr->gtGetOp1();
assert(!node->isContained());
- LocationInfoListNode* nodeInfo = getLocationInfo(node);
- unsigned currentRegCount = nodeInfo->info.dstCount;
- regMaskTP sourceMask = RBM_NONE;
+ // The only multi-reg nodes we should see are OperIsMultiRegOp()
+ unsigned currentRegCount;
+ if (node->OperIsMultiRegOp())
+ {
+ currentRegCount = node->AsMultiRegOp()->GetRegCount();
+ }
+ else
+ {
+ assert(!node->IsMultiRegNode());
+ currentRegCount = 1;
+ }
+ regMaskTP sourceMask = RBM_NONE;
if (sourceRegCount < argNode->gtNumRegs)
{
for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++)
{
sourceMask |= genRegMask((regNumber)((unsigned)argReg + sourceRegCount + regIndex));
}
- nodeInfo->info.setSrcCandidates(this, sourceMask);
}
sourceRegCount += currentRegCount;
- useList.Append(nodeInfo);
+ BuildUse(node, sourceMask);
}
- info->srcCount += sourceRegCount;
+ srcCount += sourceRegCount;
assert(putArgChild->isContained());
}
else
@@ -530,11 +531,9 @@ void LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
assert(putArgChild->OperGet() == GT_OBJ);
// We can use a ldr/str sequence so we need an internal register
- info->internalIntCount = 1;
- regMaskTP internalMask = RBM_ALLINT & ~argMask;
- info->setInternalCandidates(this, internalMask);
+ buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask);
- GenTree* objChild = putArgChild->gtOp.gtOp1;
+ GenTree* objChild = putArgChild->gtGetOp1();
if (objChild->OperGet() == GT_LCL_VAR_ADDR)
{
// We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR
@@ -544,10 +543,13 @@ void LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
}
else
{
- info->srcCount = GetIndirInfo(putArgChild->AsIndir());
+ srcCount = BuildIndirUses(putArgChild->AsIndir());
}
assert(putArgChild->isContained());
}
+ buildInternalRegisterUses();
+ BuildDefs(argNode, dstCount, argMask);
+ return srcCount;
}
#endif // _TARGET_ARM_
@@ -558,35 +560,21 @@ void LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
// blkNode - The block store node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
+int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
{
- TreeNodeInfo* info = currentNodeInfo;
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
-
- LocationInfoListNode* dstAddrInfo = nullptr;
- LocationInfoListNode* sourceInfo = nullptr;
- LocationInfoListNode* sizeInfo = nullptr;
-
- // Sources are dest address and initVal or source.
- // We may require an additional source or temp register for the size.
- if (!dstAddr->isContained())
- {
- info->srcCount++;
- dstAddrInfo = getLocationInfo(dstAddr);
- }
- assert(info->dstCount == 0);
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ int srcCount = 0;
+
GenTree* srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
- regMaskTP dstAddrRegMask = RBM_NONE;
- regMaskTP sourceRegMask = RBM_NONE;
- regMaskTP blkSizeRegMask = RBM_NONE;
-
- short internalIntCount = 0;
+ regMaskTP dstAddrRegMask = RBM_NONE;
+ regMaskTP sourceRegMask = RBM_NONE;
+ regMaskTP blkSizeRegMask = RBM_NONE;
regMaskTP internalIntCandidates = RBM_NONE;
if (isInitBlk)
@@ -598,11 +586,6 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
initVal = initVal->gtGetOp1();
}
srcAddrOrFill = initVal;
- if (!initVal->isContained())
- {
- info->srcCount++;
- sourceInfo = getLocationInfo(initVal);
- }
if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
{
@@ -631,26 +614,23 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
assert(source->isContained());
srcAddrOrFill = source->gtGetOp1();
assert(!srcAddrOrFill->isContained());
- sourceInfo = getLocationInfo(srcAddrOrFill);
- info->srcCount++;
}
if (blkNode->OperGet() == GT_STORE_OBJ)
{
// CopyObj
// We don't need to materialize the struct size but we still need
// a temporary register to perform the sequence of loads and stores.
- internalIntCount = 1;
+ // We can't use the special Write Barrier registers, so exclude them from the mask
+ internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
+ buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
if (size >= 2 * REGSIZE_BYTES)
{
// We will use ldp/stp to reduce code size and improve performance
// so we need to reserve an extra internal register
- internalIntCount++;
+ buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
}
- // We can't use the special Write Barrier registers, so exclude them from the mask
- internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
-
// If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
@@ -671,15 +651,14 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
// we should unroll the loop to improve CQ.
// For reference see the code in lsraxarch.cpp.
- internalIntCount = 1;
- internalIntCandidates = RBM_ALLINT;
+ buildInternalIntRegisterDefForNode(blkNode);
#ifdef _TARGET_ARM64_
if (size >= 2 * REGSIZE_BYTES)
{
// We will use ldp/stp to reduce code size and improve performance
// so we need to reserve an extra internal register
- internalIntCount++;
+ buildInternalIntRegisterDefForNode(blkNode);
}
#endif // _TARGET_ARM64_
}
@@ -696,63 +675,42 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
}
}
}
- if (dstAddrInfo != nullptr)
+
+ if ((size != 0) && (blkSizeRegMask != RBM_NONE))
{
- if (dstAddrRegMask != RBM_NONE)
- {
- dstAddrInfo->info.setSrcCandidates(this, dstAddrRegMask);
- }
- useList.Append(dstAddrInfo);
+ // Reserve a temp register for the block size argument.
+ buildInternalIntRegisterDefForNode(blkNode, blkSizeRegMask);
+ }
+
+ if (!dstAddr->isContained() && !blkNode->IsReverseOp())
+ {
+ srcCount++;
+ BuildUse(dstAddr, dstAddrRegMask);
}
- if (sourceRegMask != RBM_NONE)
+ if ((srcAddrOrFill != nullptr) && !srcAddrOrFill->isContained())
{
- if (sourceInfo != nullptr)
- {
- sourceInfo->info.setSrcCandidates(this, sourceRegMask);
- }
- else
- {
- // This is a local source; we'll use a temp register for its address.
- internalIntCandidates |= sourceRegMask;
- internalIntCount++;
- }
+ srcCount++;
+ BuildUse(srcAddrOrFill, sourceRegMask);
}
- if (sourceInfo != nullptr)
+ if (!dstAddr->isContained() && blkNode->IsReverseOp())
{
- useList.Add(sourceInfo, blkNode->IsReverseOp());
+ srcCount++;
+ BuildUse(dstAddr, dstAddrRegMask);
}
- if (blkNode->OperIs(GT_STORE_DYN_BLK))
+ if (size == 0)
{
+ assert(blkNode->OperIs(GT_STORE_DYN_BLK));
// The block size argument is a third argument to GT_STORE_DYN_BLK
- info->srcCount++;
-
+ srcCount++;
GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- sizeInfo = getLocationInfo(blockSize);
- useList.Add(sizeInfo, blkNode->AsDynBlk()->gtEvalSizeFirst);
+ BuildUse(blockSize, blkSizeRegMask);
}
- if (blkSizeRegMask != RBM_NONE)
- {
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- internalIntCandidates |= blkSizeRegMask;
- internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- assert((blkNode->gtOper == GT_STORE_DYN_BLK) && (sizeInfo != nullptr));
- info->setSrcCount(3);
- sizeInfo->info.setSrcCandidates(this, blkSizeRegMask);
- }
- }
- if (internalIntCount != 0)
- {
- info->internalIntCount = internalIntCount;
- info->setInternalCandidates(this, internalIntCandidates);
- }
+ buildInternalRegisterUses();
+ regMaskTP killMask = getKillSetForBlockStore(blkNode);
+ BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
+ return srcCount;
}
#endif // _TARGET_ARMARCH_
diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp
index 71e7d85264..e962b1edc0 100644
--- a/src/jit/lsrabuild.cpp
+++ b/src/jit/lsrabuild.cpp
@@ -23,29 +23,78 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lsra.h"
//------------------------------------------------------------------------
-// LocationInfoListNodePool::LocationInfoListNodePool:
-// Creates a pool of `LocationInfoListNode` values.
+// RefInfoList
+//------------------------------------------------------------------------
+// removeListNode - retrieve the RefInfoListNode for the given GenTree node
+//
+// Notes:
+// The BuildNode methods use this helper to retrieve the RefPositions for child nodes
+// from the useList being constructed. Note that, if the user knows the order of the operands,
+// it is expected that they should just retrieve them directly.
+
+RefInfoListNode* RefInfoList::removeListNode(GenTree* node)
+{
+ RefInfoListNode* prevListNode = nullptr;
+ for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
+ {
+ if (listNode->treeNode == node)
+ {
+ assert(listNode->ref->getMultiRegIdx() == 0);
+ return removeListNode(listNode, prevListNode);
+ }
+ prevListNode = listNode;
+ }
+ assert(!"removeListNode didn't find the node");
+ unreached();
+}
+
+//------------------------------------------------------------------------
+// removeListNode - retrieve the RefInfoListNode for one reg of the given multireg GenTree node
+//
+// Notes:
+// The BuildNode methods use this helper to retrieve the RefPositions for child nodes
+// from the useList being constructed. Note that, if the user knows the order of the operands,
+// it is expected that they should just retrieve them directly.
+
+RefInfoListNode* RefInfoList::removeListNode(GenTree* node, unsigned multiRegIdx)
+{
+ RefInfoListNode* prevListNode = nullptr;
+ for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
+ {
+ if ((listNode->treeNode == node) && (listNode->ref->getMultiRegIdx() == multiRegIdx))
+ {
+ return removeListNode(listNode, prevListNode);
+ }
+ prevListNode = listNode;
+ }
+ assert(!"removeListNode didn't find the node");
+ unreached();
+}
+
+//------------------------------------------------------------------------
+// RefInfoListNodePool::RefInfoListNodePool:
+// Creates a pool of `RefInfoListNode` values.
//
// Arguments:
// compiler - The compiler context.
// preallocate - The number of nodes to preallocate.
//
-LocationInfoListNodePool::LocationInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler)
+RefInfoListNodePool::RefInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler)
{
if (preallocate > 0)
{
- size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate;
- LocationInfoListNode* preallocatedNodes =
- reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA));
+ size_t preallocateSize = sizeof(RefInfoListNode) * preallocate;
+ RefInfoListNode* preallocatedNodes =
+ static_cast<RefInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA));
- LocationInfoListNode* head = preallocatedNodes;
- head->m_next = nullptr;
+ RefInfoListNode* head = preallocatedNodes;
+ head->m_next = nullptr;
for (unsigned i = 1; i < preallocate; i++)
{
- LocationInfoListNode* node = &preallocatedNodes[i];
- node->m_next = head;
- head = node;
+ RefInfoListNode* node = &preallocatedNodes[i];
+ node->m_next = head;
+ head = node;
}
m_freeList = head;
@@ -53,32 +102,31 @@ LocationInfoListNodePool::LocationInfoListNodePool(Compiler* compiler, unsigned
}
//------------------------------------------------------------------------
-// LocationInfoListNodePool::GetNode: Fetches an unused node from the
+// RefInfoListNodePool::GetNode: Fetches an unused node from the
// pool.
//
// Arguments:
-// l - - The `LsraLocation` for the `LocationInfo` value.
-// i - The interval for the `LocationInfo` value.
-// t - The IR node for the `LocationInfo` value
-// regIdx - The register index for the `LocationInfo` value.
+// l - - The `LsraLocation` for the `RefInfo` value.
+// i - The interval for the `RefInfo` value.
+// t - The IR node for the `RefInfo` value
+// regIdx - The register index for the `RefInfo` value.
//
// Returns:
-// A pooled or newly-allocated `LocationInfoListNode`, depending on the
+// A pooled or newly-allocated `RefInfoListNode`, depending on the
// contents of the pool.
-LocationInfoListNode* LocationInfoListNodePool::GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx)
+RefInfoListNode* RefInfoListNodePool::GetNode(RefPosition* r, GenTree* t, unsigned regIdx)
{
- LocationInfoListNode* head = m_freeList;
+ RefInfoListNode* head = m_freeList;
if (head == nullptr)
{
- head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode)));
+ head = reinterpret_cast<RefInfoListNode*>(m_compiler->compGetMem(sizeof(RefInfoListNode)));
}
else
{
m_freeList = head->m_next;
}
- head->loc = l;
- head->interval = i;
+ head->ref = r;
head->treeNode = t;
head->m_next = nullptr;
@@ -86,23 +134,16 @@ LocationInfoListNode* LocationInfoListNodePool::GetNode(LsraLocation l, Interval
}
//------------------------------------------------------------------------
-// LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the node
-// pool and clears the given list.
+// RefInfoListNodePool::ReturnNode: Returns a list of nodes to the node
+// pool and clears the given list.
//
// Arguments:
// list - The list to return.
//
-void LocationInfoListNodePool::ReturnNodes(LocationInfoList& list)
+void RefInfoListNodePool::ReturnNode(RefInfoListNode* listNode)
{
- assert(list.m_head != nullptr);
- assert(list.m_tail != nullptr);
-
- LocationInfoListNode* head = m_freeList;
- list.m_tail->m_next = head;
- m_freeList = list.m_head;
-
- list.m_head = nullptr;
- list.m_tail = nullptr;
+ listNode->m_next = m_freeList;
+ m_freeList = listNode;
}
//------------------------------------------------------------------------
@@ -511,6 +552,17 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval,
regMaskTP mask,
unsigned multiRegIdx /* = 0 */)
{
+ if (theInterval != nullptr)
+ {
+ if (mask == RBM_NONE)
+ {
+ mask = allRegs(theInterval->registerType);
+ }
+ }
+ else
+ {
+ assert(theRefType == RefTypeBB || theRefType == RefTypeKillGCRefs);
+ }
#ifdef DEBUG
if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
{
@@ -572,6 +624,41 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval,
return newRP;
}
+//---------------------------------------------------------------------------
+// newUseRefPosition: allocate and initialize a RefTypeUse RefPosition at currentLoc.
+//
+// Arguments:
+// theInterval - interval to which RefPosition is associated with.
+// theTreeNode - GenTree node for which this RefPosition is created
+// mask - Set of valid registers for this RefPosition
+// multiRegIdx - register position if this RefPosition corresponds to a
+// multi-reg call node.
+// minRegCount - Minimum number registers that needs to be ensured while
+// constraining candidates for this ref position under
+// LSRA stress. This is a DEBUG only arg.
+//
+// Return Value:
+// a new RefPosition
+//
+// Notes:
+// If the caller knows that 'theTreeNode' is NOT a candidate local, newRefPosition
+// can/should be called directly.
+//
+RefPosition* LinearScan::newUseRefPosition(Interval* theInterval,
+ GenTree* theTreeNode,
+ regMaskTP mask,
+ unsigned multiRegIdx)
+{
+ GenTree* treeNode = isCandidateLocalRef(theTreeNode) ? theTreeNode : nullptr;
+
+ RefPosition* pos = newRefPosition(theInterval, currentLoc, RefTypeUse, treeNode, mask, multiRegIdx);
+ if (theTreeNode->IsRegOptional())
+ {
+ pos->setAllocateIfProfitable(true);
+ }
+ return pos;
+}
+
//------------------------------------------------------------------------
// IsContainableMemoryOp: Checks whether this is a memory op that can be contained.
//
@@ -674,7 +761,170 @@ regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
killMask = compiler->compHelperCallKillSet(helper);
}
}
+ return killMask;
+}
+
+//------------------------------------------------------------------------
+// getKillSetForMul: Determine the liveness kill set for a multiply node.
+//
+// Arguments:
+// tree - the multiply node
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForMul(GenTreeOp* mulNode)
+{
+ regMaskTP killMask = RBM_NONE;
+#ifdef _TARGET_XARCH_
+ assert(mulNode->OperIsMul());
+ if (!mulNode->OperIs(GT_MUL) || (((mulNode->gtFlags & GTF_UNSIGNED) != 0) && mulNode->gtOverflowEx()))
+ {
+ killMask = RBM_RAX | RBM_RDX;
+ }
+#endif // _TARGET_XARCH_
+ return killMask;
+}
+//------------------------------------------------------------------------
+// getKillSetForModDiv: Determine the liveness kill set for a mod or div node.
+//
+// Arguments:
+// tree - the mod or div node as a GenTreeOp
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node)
+{
+ regMaskTP killMask = RBM_NONE;
+#ifdef _TARGET_XARCH_
+ assert(node->OperIs(GT_MOD, GT_DIV, GT_UMOD, GT_UDIV));
+ if (!varTypeIsFloating(node->TypeGet()))
+ {
+ // Both RAX and RDX are killed by the operation
+ killMask = RBM_RAX | RBM_RDX;
+ }
+#endif // _TARGET_XARCH_
+ return killMask;
+}
+
+//------------------------------------------------------------------------
+// getKillSetForCall: Determine the liveness kill set for a call node.
+//
+// Arguments:
+// tree - the GenTreeCall node
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)
+{
+ regMaskTP killMask = RBM_NONE;
+#ifdef _TARGET_X86_
+ if (compiler->compFloatingPointUsed)
+ {
+ if (call->TypeGet() == TYP_DOUBLE)
+ {
+ needDoubleTmpForFPCall = true;
+ }
+ else if (call->TypeGet() == TYP_FLOAT)
+ {
+ needFloatTmpForFPCall = true;
+ }
+ }
+#endif // _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+ if (call->IsHelperCall())
+ {
+ CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
+ killMask = compiler->compHelperCallKillSet(helpFunc);
+ }
+ else
+#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+ {
+ // if there is no FP used, we can ignore the FP kills
+ if (compiler->compFloatingPointUsed)
+ {
+ killMask = RBM_CALLEE_TRASH;
+ }
+ else
+ {
+ killMask = RBM_INT_CALLEE_TRASH;
+ }
+#ifdef _TARGET_ARM_
+ if (call->IsVirtualStub())
+ {
+ killMask |= compiler->virtualStubParamInfo->GetRegMask();
+ }
+#else // !_TARGET_ARM_
+ // Verify that the special virtual stub call registers are in the kill mask.
+ // We don't just add them unconditionally to the killMask because for most architectures
+ // they are already in the RBM_CALLEE_TRASH set,
+ // and we don't want to introduce extra checks and calls in this hot function.
+ assert(!call->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) ==
+ compiler->virtualStubParamInfo->GetRegMask()));
+#endif // !_TARGET_ARM_
+ }
+ return killMask;
+}
+
+//------------------------------------------------------------------------
+// getKillSetForBlockStore: Determine the liveness kill set for a block store node.
+//
+// Arguments:
+// tree - the block store node as a GenTreeBlk
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
+{
+ assert(blkNode->OperIsStore());
+ regMaskTP killMask = RBM_NONE;
+
+ if ((blkNode->OperGet() == GT_STORE_OBJ) && blkNode->OperIsCopyBlkOp())
+ {
+ assert(blkNode->AsObj()->gtGcPtrCount != 0);
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
+ }
+ else
+ {
+ bool isCopyBlk = varTypeIsStruct(blkNode->Data());
+ switch (blkNode->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
+ }
+ else
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
+ }
+ break;
+
+#ifdef _TARGET_XARCH_
+ case GenTreeBlk::BlkOpKindRepInstr:
+ if (isCopyBlk)
+ {
+ // rep movs kills RCX, RDI and RSI
+ killMask = RBM_RCX | RBM_RDI | RBM_RSI;
+ }
+ else
+ {
+ // rep stos kills RCX and RDI.
+ // (Note that the Data() node, if not constant, will be assigned to
+ // RCX, but it's find that this kills it, as the value is not available
+ // after this node in any case.)
+ killMask = RBM_RDI | RBM_RCX;
+ }
+ break;
+#else
+ case GenTreeBlk::BlkOpKindRepInstr:
+#endif
+ case GenTreeBlk::BlkOpKindUnroll:
+ case GenTreeBlk::BlkOpKindInvalid:
+ // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
+ break;
+ }
+ }
return killMask;
}
@@ -717,10 +967,38 @@ regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node)
#endif // FEATURE_HW_INTRINSICS
//------------------------------------------------------------------------
+// getKillSetForReturn: Determine the liveness kill set for a return node.
+//
+// Arguments:
+// NONE (this kill set is independent of the details of the specific return.)
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForReturn()
+{
+ return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE)
+ : RBM_NONE;
+}
+
+//------------------------------------------------------------------------
+// getKillSetForProfilerHook: Determine the liveness kill set for a profiler hook.
+//
+// Arguments:
+// NONE (this kill set is independent of the details of the specific node.)
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForProfilerHook()
+{
+ return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL)
+ : RBM_NONE;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
// getKillSetForNode: Return the registers killed by the given tree node.
//
// Arguments:
-// compiler - the compiler context to use
// tree - the tree for which the kill set is needed.
//
// Return Value: a register mask of the registers killed
@@ -730,140 +1008,34 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
regMaskTP killMask = RBM_NONE;
switch (tree->OperGet())
{
-#ifdef _TARGET_XARCH_
case GT_MUL:
- // We use the 128-bit multiply when performing an overflow checking unsigned multiply
- //
- if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx())
- {
- // Both RAX and RDX are killed by the operation
- killMask = RBM_RAX | RBM_RDX;
- }
- break;
-
case GT_MULHI:
-#if defined(_TARGET_X86_)
+#if !defined(_TARGET_64BIT_)
case GT_MUL_LONG:
#endif
- killMask = RBM_RAX | RBM_RDX;
+ killMask = getKillSetForMul(tree->AsOp());
break;
case GT_MOD:
case GT_DIV:
case GT_UMOD:
case GT_UDIV:
- if (!varTypeIsFloating(tree->TypeGet()))
- {
- // Both RAX and RDX are killed by the operation
- killMask = RBM_RAX | RBM_RDX;
- }
+ killMask = getKillSetForModDiv(tree->AsOp());
break;
-#endif // _TARGET_XARCH_
case GT_STORE_OBJ:
- if (tree->OperIsCopyBlkOp())
- {
- assert(tree->AsObj()->gtGcPtrCount != 0);
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
- break;
- }
- __fallthrough;
-
case GT_STORE_BLK:
case GT_STORE_DYN_BLK:
- {
- GenTreeBlk* blkNode = tree->AsBlk();
- bool isCopyBlk = varTypeIsStruct(blkNode->Data());
- switch (blkNode->gtBlkOpKind)
- {
- case GenTreeBlk::BlkOpKindHelper:
- if (isCopyBlk)
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
- }
- else
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
- }
- break;
-
-#ifdef _TARGET_XARCH_
- case GenTreeBlk::BlkOpKindRepInstr:
- if (isCopyBlk)
- {
- // rep movs kills RCX, RDI and RSI
- killMask = RBM_RCX | RBM_RDI | RBM_RSI;
- }
- else
- {
- // rep stos kills RCX and RDI.
- // (Note that the Data() node, if not constant, will be assigned to
- // RCX, but it's find that this kills it, as the value is not available
- // after this node in any case.)
- killMask = RBM_RDI | RBM_RCX;
- }
- break;
-#else
- case GenTreeBlk::BlkOpKindRepInstr:
-#endif
- case GenTreeBlk::BlkOpKindUnroll:
- case GenTreeBlk::BlkOpKindInvalid:
- // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
- break;
- }
- }
- break;
+ killMask = getKillSetForBlockStore(tree->AsBlk());
+ break;
case GT_RETURNTRAP:
killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
break;
+
case GT_CALL:
-#ifdef _TARGET_X86_
- if (compiler->compFloatingPointUsed)
- {
- if (tree->TypeGet() == TYP_DOUBLE)
- {
- needDoubleTmpForFPCall = true;
- }
- else if (tree->TypeGet() == TYP_FLOAT)
- {
- needFloatTmpForFPCall = true;
- }
- }
-#endif // _TARGET_X86_
-#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
- if (tree->IsHelperCall())
- {
- GenTreeCall* call = tree->AsCall();
- CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
- killMask = compiler->compHelperCallKillSet(helpFunc);
- }
- else
-#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
- {
- // if there is no FP used, we can ignore the FP kills
- if (compiler->compFloatingPointUsed)
- {
- killMask = RBM_CALLEE_TRASH;
- }
- else
- {
- killMask = RBM_INT_CALLEE_TRASH;
- }
-#ifdef _TARGET_ARM_
- if (tree->AsCall()->IsVirtualStub())
- {
- killMask |= compiler->virtualStubParamInfo->GetRegMask();
- }
-#else // !_TARGET_ARM_
- // Verify that the special virtual stub call registers are in the kill mask.
- // We don't just add them unconditionally to the killMask because for most architectures
- // they are already in the RBM_CALLEE_TRASH set,
- // and we don't want to introduce extra checks and calls in this hot function.
- assert(!tree->AsCall()->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) ==
- compiler->virtualStubParamInfo->GetRegMask()));
-#endif
- }
+ killMask = getKillSetForCall(tree->AsCall());
+
break;
case GT_STOREIND:
killMask = getKillSetForStoreInd(tree->AsStoreInd());
@@ -875,17 +1047,11 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
// profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for
// more details.
case GT_RETURN:
- if (compiler->compIsProfilerHookNeeded())
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE);
- }
+ killMask = getKillSetForReturn();
break;
case GT_PROF_HOOK:
- if (compiler->compIsProfilerHookNeeded())
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL);
- }
+ killMask = getKillSetForProfilerHook();
break;
#endif // PROFILING_SUPPORTED
@@ -901,6 +1067,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
}
return killMask;
}
+#endif // DEBUG
//------------------------------------------------------------------------
// buildKillPositionsForNode:
@@ -921,10 +1088,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
// If we generate any kills, we will mark all currentLiveVars as being preferenced
// to avoid the killed registers. This is somewhat conservative.
-bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc)
+bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask)
{
- regMaskTP killMask = getKillSetForNode(tree);
- bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
+ bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
if (killMask != RBM_NONE)
{
// The killMask identifies a set of registers that will be used during codegen.
@@ -1013,96 +1179,91 @@ RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regTy
{
Interval* current = newInterval(regType);
current->isInternal = true;
- return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0);
+ RefPosition* newDef = newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0);
+ assert(internalCount < MaxInternalCount);
+ internalDefs[internalCount++] = newDef;
+ return newDef;
}
//------------------------------------------------------------------------
-// buildInternalRegisterDefsForNode - build Def positions for internal
-// registers required for tree node.
+// buildInternalRegisterDefForNode - Create an Interval for an internal int register, and a def RefPosition
//
// Arguments:
-// tree - Gentree node that needs internal registers
-// temps - in-out array which is populated with ref positions
-// created for Def of internal registers
+// tree - Gentree node that needs internal registers
+// internalCands - The mask of valid registers
//
// Returns:
-// The total number of Def positions created for internal registers of tree no.
-int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* temps[])
+// The def RefPosition created for this internal temp.
+//
+RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands)
{
- int count;
- int internalIntCount = info->internalIntCount;
- regMaskTP internalCands = info->getInternalCandidates(this);
-
- // If the number of internal integer registers required is the same as the number of candidate integer registers in
- // the candidate set, then they must be handled as fixed registers.
- // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
- bool fixedRegs = false;
- regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
- if (((int)genCountBits(internalIntCandidates)) == internalIntCount)
+ bool fixedReg = false;
+ // The candidate set should contain only integer registers.
+ assert((internalCands & ~allRegs(TYP_INT)) == RBM_NONE);
+ if (genMaxOneBit(internalCands))
{
- fixedRegs = true;
+ fixedReg = true;
}
- for (count = 0; count < internalIntCount; count++)
- {
- regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT));
- if (fixedRegs)
- {
- internalIntCands = genFindLowestBit(internalIntCands);
- internalCands &= ~internalIntCands;
- }
- temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands);
- }
+ RefPosition* defRefPosition = defineNewInternalTemp(tree, IntRegisterType, internalCands);
+ return defRefPosition;
+}
- int internalFloatCount = info->internalFloatCount;
- for (int i = 0; i < internalFloatCount; i++)
+//------------------------------------------------------------------------
+// buildInternalFloatRegisterDefForNode - Create an Interval for an internal fp register, and a def RefPosition
+//
+// Arguments:
+// tree - Gentree node that needs internal registers
+// internalCands - The mask of valid registers
+//
+// Returns:
+// The def RefPosition created for this internal temp.
+//
+RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands)
+{
+ bool fixedReg = false;
+ // The candidate set should contain only float registers.
+ assert((internalCands & ~allRegs(TYP_FLOAT)) == RBM_NONE);
+ if (genMaxOneBit(internalCands))
{
- regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates());
- temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, internalFPCands);
+ fixedReg = true;
}
- assert(count < MaxInternalRegisters);
- assert(count == (internalIntCount + internalFloatCount));
- return count;
+ RefPosition* defRefPosition = defineNewInternalTemp(tree, FloatRegisterType, internalCands);
+ return defRefPosition;
}
//------------------------------------------------------------------------
-// buildInternalRegisterUsesForNode - adds Use positions for internal
+// buildInternalRegisterUses - adds use positions for internal
// registers required for tree node.
//
-// Arguments:
-// tree - Gentree node that needs internal registers
-// defs - int array containing Def positions of internal
-// registers.
-// total - Total number of Def positions in 'defs' array.
-//
-// Returns:
-// Void.
-void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total)
+// Notes:
+// During the BuildNode process, calls to buildInternalIntRegisterDefForNode and
+// buildInternalFloatRegisterDefForNode put new RefPositions in the 'internalDefs'
+// array, and increment 'internalCount'. This method must be called to add corresponding
+// uses. It then resets the 'internalCount' for the handling of the next node.
+//
+// If the internal registers must differ from the target register, 'setInternalRegsDelayFree'
+// must be set to true, so that the uses may be marked 'delayRegFree'.
+// Note that if a node has both float and int temps, generally the target with either be
+// int *or* float, and it is not really necessary to set this on the other type, but it does
+// no harm as it won't restrict the register selection.
+//
+void LinearScan::buildInternalRegisterUses()
{
- assert(total < MaxInternalRegisters);
-
- // defs[] has been populated by buildInternalRegisterDefsForNode
- // now just add uses to the defs previously added.
- for (int i = 0; i < total; i++)
+ assert(internalCount <= MaxInternalCount);
+ for (int i = 0; i < internalCount; i++)
{
- RefPosition* prevRefPosition = defs[i];
- assert(prevRefPosition != nullptr);
- regMaskTP mask = prevRefPosition->registerAssignment;
- if (prevRefPosition->isPhysRegRef)
+ RefPosition* def = internalDefs[i];
+ regMaskTP mask = def->registerAssignment;
+ RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0);
+ if (setInternalRegsDelayFree)
{
- newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask);
- }
- else
- {
- RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0);
-
- if (info->isInternalRegDelayFree)
- {
- newest->delayRegFree = true;
- }
+ use->delayRegFree = true;
+ pendingDelayFree = true;
}
}
+ // internalCount = 0;
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
@@ -1120,17 +1281,16 @@ void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* i
// Notes: The returned set is used by buildUpperVectorRestoreRefPositions.
//
VARSET_VALRET_TP
-LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc)
+LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet)
{
assert(enregisterLocalVars);
VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler));
- regMaskTP fpCalleeKillSet = RBM_NONE;
if (!VarSetOps::IsEmpty(compiler, largeVectorVars))
{
// We actually need to find any calls that kill the upper-half of the callee-save vector registers.
// But we will use as a proxy any node that kills floating point registers.
// (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
- fpCalleeKillSet = getKillSetForNode(tree);
+ // This check should have been done by the caller.
if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
{
VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
@@ -1311,529 +1471,49 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, Lsra
}
#endif // DEBUG
- // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will
- // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will
- // be thrown away when we're done.
- LocationInfoListNode* locationInfo = nullptr;
- TreeNodeInfo tempInfo;
- TreeNodeInfo* info = nullptr;
- int consume = 0;
- int produce = 0;
- if (!tree->isContained())
- {
- if (tree->IsValue())
- {
- locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree);
- currentNodeInfo = &locationInfo->info;
- }
- else
- {
- currentNodeInfo = &tempInfo;
- }
- info = currentNodeInfo;
- info->Initialize(this, tree);
- BuildNode(tree);
- assert(info->IsValid(this));
- consume = info->srcCount;
- produce = info->dstCount;
-#ifdef DEBUG
- if (VERBOSE)
- {
- printf(" +");
- info->dump(this);
- tree->dumpLIRFlags();
- printf("\n");
- }
-#endif // DEBUG
- }
-
-#ifdef DEBUG
- if (VERBOSE)
- {
- if (tree->isContained())
- {
- JITDUMP("Contained\n");
- }
- else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && tree->IsUnusedValue())
- {
- JITDUMP("Unused\n");
- }
- else
- {
- JITDUMP(" consume=%d produce=%d\n", consume, produce);
- }
- }
-#endif // DEBUG
-
- assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume));
-
- if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD))
- {
- LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
- if (isCandidateVar(varDsc))
- {
- assert(consume == 0);
-
- // We handle tracked variables differently from non-tracked ones. If it is tracked,
- // we simply add a use or def of the tracked variable. Otherwise, for a use we need
- // to actually add the appropriate references for loading or storing the variable.
- //
- // It won't actually get used or defined until the appropriate ancestor tree node
- // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
- // to a call
-
- assert(varDsc->lvTracked);
- unsigned varIndex = varDsc->lvVarIndex;
-
- if (!tree->IsUnusedValue() && !tree->isContained())
- {
- assert(produce != 0);
-
- locationInfo->interval = getIntervalForLocalVar(varIndex);
- defList.Append(locationInfo);
- }
- return;
- }
- }
if (tree->isContained())
{
- return;
- }
-
- // Handle the case of local variable assignment
- Interval* varDefInterval = nullptr;
-
- GenTree* defNode = tree;
-
- // noAdd means the node creates a def but for purposes of map
- // management do not add it because data is not flowing up the
- // tree
-
- bool noAdd = info->isLocalDefUse;
- RefPosition* prevPos = nullptr;
-
- bool isSpecialPutArg = false;
-
- assert(!tree->OperIsAssignment());
- if (tree->OperIsLocalStore())
- {
- GenTreeLclVarCommon* const store = tree->AsLclVarCommon();
- assert((consume > 1) || (regType(store->gtOp1->TypeGet()) == regType(store->TypeGet())));
-
- LclVarDsc* varDsc = &compiler->lvaTable[store->gtLclNum];
- if (isCandidateVar(varDsc))
+#ifdef _TARGET_XARCH_
+ // On XArch we can have contained candidate lclVars if they are part of a RMW
+ // address computation. In this case we need to check whether it is a last use.
+ if (tree->IsLocal() && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
{
- // We always push the tracked lclVar intervals
- assert(varDsc->lvTracked);
- unsigned varIndex = varDsc->lvVarIndex;
- varDefInterval = getIntervalForLocalVar(varIndex);
- assert((store->gtFlags & GTF_VAR_DEF) != 0);
- defNode = tree;
- if (produce == 0)
- {
- produce = 1;
- noAdd = true;
- }
-
- assert(consume <= MAX_RET_REG_COUNT);
- if (consume == 1)
+ LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
+ if (isCandidateVar(varDsc))
{
- // Get the location info for the register defined by the first operand.
- LocationInfoListNode& operandInfo = *(useList.Begin());
- assert(operandInfo.treeNode == tree->gtGetOp1());
-
- Interval* srcInterval = operandInfo.interval;
- if (srcInterval->relatedInterval == nullptr)
- {
- // Preference the source to the dest, unless this is a non-last-use localVar.
- // Note that the last-use info is not correct, but it is a better approximation than preferencing
- // the source to the dest, if the source's lifetime extends beyond the dest.
- if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0)
- {
- srcInterval->assignRelatedInterval(varDefInterval);
- }
- }
- else if (!srcInterval->isLocalVar)
- {
- // Preference the source to dest, if src is not a local var.
- srcInterval->assignRelatedInterval(varDefInterval);
- }
+ assert(varDsc->lvTracked);
+ unsigned varIndex = varDsc->lvVarIndex;
+ VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex);
}
}
- else if (store->gtOp1->OperIs(GT_BITCAST))
- {
- store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet();
-
- // Get the location info for the register defined by the first operand.
- LocationInfoListNode& operandInfo = *(useList.Begin());
- assert(operandInfo.treeNode == tree->gtGetOp1());
-
- Interval* srcInterval = operandInfo.interval;
- srcInterval->registerType = regType(store->TypeGet());
-
- RefPosition* srcDefPosition = srcInterval->firstRefPosition;
- assert(srcDefPosition != nullptr);
- assert(srcDefPosition->refType == RefTypeDef);
- assert(srcDefPosition->treeNode == store->gtOp1);
-
- srcDefPosition->registerAssignment = allRegs(store->TypeGet());
- operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet()));
- }
- }
- else if (noAdd && produce == 0)
- {
- // Dead nodes may remain after tree rationalization, decomposition or lowering.
- // They should be marked as UnusedValue.
- // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation.
- assert(!noAdd || (produce != 0));
- }
-
- Interval* prefSrcInterval = nullptr;
-
- // If this is a binary operator that will be encoded with 2 operand fields
- // (i.e. the target is read-modify-write), preference the dst to op1.
-
- bool hasDelayFreeSrc = info->hasDelayFreeSrc;
-
-#if defined(DEBUG) && defined(_TARGET_X86_)
- // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode
- // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the
- // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in
- // situations like the following:
- //
- // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d>
- //
- // /--* t1026 ref
- // t1352 = * putarg_reg ref REG NA
- //
- // t342 = lclVar int V14 loc6 u:4 REG NA $50c
- //
- // t343 = const int 1 REG NA $41
- //
- // /--* t342 int
- // +--* t343 int
- // t344 = * + int REG NA $495
- //
- // t345 = lclVar int V04 arg4 u:2 REG NA $100
- //
- // /--* t344 int
- // +--* t345 int
- // t346 = * % int REG NA $496
- //
- // /--* t346 int
- // t1353 = * putarg_reg int REG NA
- //
- // t1354 = lclVar ref V52 tmp35 (last use) REG NA
- //
- // /--* t1354 ref
- // t1355 = * lea(b+0) byref REG NA
- //
- // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the
- // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register
- // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to
- // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be
- // available due to the special put arg.
- const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER;
-#else
- const bool supportsSpecialPutArg = true;
-#endif
-
- if (supportsSpecialPutArg)
- {
- if ((tree->OperGet() == GT_PUTARG_REG) && isCandidateLocalRef(tree->gtGetOp1()) &&
- (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0)
- {
- // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use,
- // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
- // (which is actually what we hope will happen).
- JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
-
- // Get the register information for the first operand of the node.
- LocationInfoListNode* operandDef = useList.Begin();
- assert(operandDef->treeNode == tree->gtGetOp1());
-
- // Preference the destination to the interval of the first register defined by the first operand.
- Interval* srcInterval = operandDef->interval;
- assert(srcInterval->isLocalVar);
- prefSrcInterval = srcInterval;
- isSpecialPutArg = true;
- INDEBUG(specialPutArgCount++);
- }
- else if (tree->IsCall())
- {
- INDEBUG(specialPutArgCount = 0);
- }
+#else // _TARGET_XARCH_
+ assert(!isCandidateLocalRef(tree));
+#endif // _TARGET_XARCH_
+ JITDUMP("Contained\n");
+ return;
}
- RefPosition* internalRefs[MaxInternalRegisters];
-
#ifdef DEBUG
// If we are constraining the registers for allocation, we will modify all the RefPositions
// we've built for this node after we've created them. In order to do that, we'll remember
// the last RefPosition prior to those created for this node.
RefPositionIterator refPositionMark = refPositions.backPosition();
+ int oldDefListCount = defList.Count();
#endif // DEBUG
- // Make intervals for all the 'internal' register requirements for this node,
- // where internal means additional registers required temporarily.
- // Create a RefTypeDef RefPosition for each such interval.
- int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs);
-
- // Make use RefPositions for all used values.
- int consumed = 0;
- for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end;
- listNode = listNode->Next())
- {
- LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode);
-
- // For tree temps, a use is always a last use and the end of the range;
- // this is set by default in newRefPosition
- GenTree* const useNode = locInfo.treeNode;
- assert(useNode != nullptr);
-
- Interval* srcInterval = locInfo.interval;
- TreeNodeInfo& useNodeInfo = locInfo.info;
- if (useNodeInfo.isTgtPref)
- {
- prefSrcInterval = srcInterval;
- }
-
- const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree);
-
- regMaskTP candidates = useNodeInfo.getSrcCandidates(this);
-#ifdef _TARGET_ARM_
- regMaskTP allCandidates = candidates;
-
- if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp())
- {
- // get i-th candidate, set bits in useCandidates must be in sequential order.
- candidates = genFindLowestReg(allCandidates);
- allCandidates &= ~candidates;
- }
-#endif // _TARGET_ARM_
-
- assert((candidates & allRegs(srcInterval->registerType)) != 0);
-
- GenTree* refPosNode;
- if (srcInterval->isLocalVar)
- {
- // We have only approximate last-use information at this point. This is because the
- // execution order doesn't actually reflect the true order in which the localVars
- // are referenced - but the order of the RefPositions will, so we recompute it after
- // RefPositions are built.
- // Use the old value for setting currentLiveVars - note that we do this with the
- // not-quite-correct setting of lastUse. However, this is OK because
- // 1) this is only for preferencing, which doesn't require strict correctness, and
- // for determing which largeVectors require having their upper-half saved & restored.
- // (Issue #17481 tracks the issue that this system results in excessive spills and
- // should be changed.)
- // 2) the cases where these out-of-order uses occur should not overlap a kill (they are
- // only known to occur within a single expression).
- if ((useNode->gtFlags & GTF_VAR_DEATH) != 0)
- {
- VarSetOps::RemoveElemD(compiler, currentLiveVars, srcInterval->getVarIndex(compiler));
- }
- refPosNode = useNode;
- }
- else
- {
- // For non-localVar uses we record nothing, as nothing needs to be written back to the tree.
- refPosNode = nullptr;
- }
-
- RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, 0);
- if (delayRegFree)
- {
- pos->delayRegFree = true;
- }
-
- if (useNode->IsRegOptional())
- {
- pos->setAllocateIfProfitable(true);
- }
- consumed++;
-
- // Create additional use RefPositions for multi-reg nodes.
- for (int idx = 1; idx < locInfo.info.dstCount; idx++)
- {
- noway_assert(srcInterval->relatedInterval != nullptr);
- srcInterval = srcInterval->relatedInterval;
-#ifdef _TARGET_ARM_
- if (useNode->OperIsPutArgSplit() ||
- (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST)))
- {
- // get first candidate, set bits in useCandidates must be in sequential order.
- candidates = genFindLowestReg(allCandidates);
- allCandidates &= ~candidates;
- }
-#endif // _TARGET_ARM_
- RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, idx);
- consumed++;
- }
- }
-
- assert(consumed == consume);
- if (consume != 0)
- {
- listNodePool.ReturnNodes(useList);
- }
-
- buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount);
-
- RegisterType registerType = getDefType(tree);
- regMaskTP candidates = info->getDstCandidates(this);
- regMaskTP useCandidates = info->getSrcCandidates(this);
+ int consume = BuildNode(tree);
#ifdef DEBUG
- if (VERBOSE && produce)
- {
- printf("Def candidates ");
- dumpRegMask(candidates);
- printf(", Use candidates ");
- dumpRegMask(useCandidates);
- printf("\n");
- }
-#endif // DEBUG
+ int newDefListCount = defList.Count();
+ int produce = newDefListCount - oldDefListCount;
+ assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume));
#if defined(_TARGET_AMD64_)
// Multi-reg call node is the only node that could produce multi-reg value
assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
-#endif // _TARGET_xxx_
-
- // Add kill positions before adding def positions
- buildKillPositionsForNode(tree, currentLoc + 1);
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- VARSET_TP liveLargeVectors(VarSetOps::UninitVal());
- if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE))
- {
- // Build RefPositions for saving any live large vectors.
- // This must be done after the kills, so that we know which large vectors are still live.
- VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1));
- }
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-
- ReturnTypeDesc* retTypeDesc = nullptr;
- bool isMultiRegCall = tree->IsMultiRegCall();
- if (isMultiRegCall)
- {
- retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
- assert((int)genCountBits(candidates) == produce);
- assert(candidates == retTypeDesc->GetABIReturnRegs());
- }
-
- // push defs
- LocationInfoList locationInfoList;
- LsraLocation defLocation = currentLoc + 1;
- Interval* interval = varDefInterval;
- // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field.
- // Keep track of the previous interval allocated, for that purpose.
- Interval* prevInterval = nullptr;
- for (int i = 0; i < produce; i++)
- {
- regMaskTP currCandidates = candidates;
-
- // In case of multi-reg call node, registerType is given by
- // the type of ith position return register.
- if (isMultiRegCall)
- {
- registerType = retTypeDesc->GetReturnRegType((unsigned)i);
- currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i));
- useCandidates = allRegs(registerType);
- }
-
-#ifdef _TARGET_ARM_
- // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order.
- if (tree->OperIsPutArgSplit() || tree->OperIsMultiRegOp())
- {
- // get i-th candidate
- currCandidates = genFindLowestReg(candidates);
- candidates &= ~currCandidates;
- }
-#endif // _TARGET_ARM_
-
- if (interval == nullptr)
- {
- // Make a new interval
- interval = newInterval(registerType);
- if (hasDelayFreeSrc || info->isInternalRegDelayFree)
- {
- interval->hasInterferingUses = true;
- }
- else if (tree->OperIsConst())
- {
- assert(!tree->IsReuseRegVal());
- interval->isConstant = true;
- }
-
- if ((currCandidates & useCandidates) != RBM_NONE)
- {
- interval->updateRegisterPreferences(currCandidates & useCandidates);
- }
-
- if (isSpecialPutArg)
- {
- interval->isSpecialPutArg = true;
- }
- }
- else
- {
- assert(registerTypesEquivalent(interval->registerType, registerType));
- assert(interval->isLocalVar);
- if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
- {
- VarSetOps::AddElemD(compiler, currentLiveVars, interval->getVarIndex(compiler));
- }
- }
-
- if (prefSrcInterval != nullptr)
- {
- interval->assignRelatedIntervalIfUnassigned(prefSrcInterval);
- }
-
- // for assignments, we want to create a refposition for the def
- // but not push it
- if (!noAdd)
- {
- if (i == 0)
- {
- locationInfo->interval = interval;
- prevInterval = interval;
- defList.Append(locationInfo);
- }
- else
- {
- // This is the 2nd or subsequent register defined by a multi-reg node.
- // Connect them using 'relatedInterval'.
- noway_assert(prevInterval != nullptr);
- prevInterval->relatedInterval = interval;
- prevInterval = interval;
- prevInterval->isMultiReg = true;
- interval->isMultiReg = true;
- }
- }
-
- RefPosition* pos = newRefPosition(interval, defLocation, RefTypeDef, defNode, currCandidates, (unsigned)i);
- if (info->isLocalDefUse)
- {
- // This must be an unused value, OR it is a special node for which we allocate
- // a target register even though it produces no value.
- assert(defNode->IsUnusedValue() || (defNode->gtOper == GT_LOCKADD));
- pos->isLocalDefUse = true;
- pos->lastUse = true;
- }
- interval->updateRegisterPreferences(currCandidates);
- interval->updateRegisterPreferences(useCandidates);
- interval = nullptr;
- }
+#endif // _TARGET_AMD64_
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- // SaveDef position must be at the same location as Def position of call node.
- if (enregisterLocalVars)
- {
- buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors);
- }
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+#endif // DEBUG
#ifdef DEBUG
// If we are constraining registers, modify all the RefPositions we've just built to specify the
@@ -1841,12 +1521,29 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, Lsra
if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT))
{
// The number of registers required for a tree node is the sum of
- // consume + produce + internalCount + specialPutArgCount.
+ // { RefTypeUses } + { RefTypeDef for the node itself } + specialPutArgCount
// This is the minimum set of registers that needs to be ensured in the candidate set of ref positions created.
//
- unsigned minRegCount =
- consume + produce + info->internalIntCount + info->internalFloatCount + specialPutArgCount;
+ // First, we count them.
+ unsigned minRegCount = 0;
+ RefPositionIterator iter = refPositionMark;
+ for (iter++; iter != refPositions.end(); iter++)
+ {
+ RefPosition* newRefPosition = &(*iter);
+ if (newRefPosition->isIntervalRef())
+ {
+ if ((newRefPosition->refType == RefTypeUse) ||
+ ((newRefPosition->refType == RefTypeDef) && !newRefPosition->getInterval()->isInternal))
+ {
+ minRegCount++;
+ }
+ if (newRefPosition->getInterval()->isSpecialPutArg)
+ {
+ minRegCount++;
+ }
+ }
+ }
for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++)
{
RefPosition* newRefPosition = &(*refPositionMark);
@@ -1873,6 +1570,10 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, Lsra
minRegCountForRef += genCountBits(killMask);
}
}
+ else if ((newRefPosition->refType) == RefTypeDef && (newRefPosition->getInterval()->isSpecialPutArg))
+ {
+ minRegCountForRef++;
+ }
newRefPosition->minRegCandidateCount = minRegCountForRef;
if (newRefPosition->IsActualRef() && doReverseCallerCallee())
{
@@ -2454,19 +2155,20 @@ void LinearScan::buildIntervals()
// Clear the "last use" flag on any vars that are live-out from this block.
{
- VarSetOps::Iter iter(compiler, block->bbLiveOut);
+ VARSET_TP bbLiveDefs(VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveOut));
+ VarSetOps::Iter iter(compiler, bbLiveDefs);
unsigned varIndex = 0;
while (iter.NextElem(&varIndex))
{
unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
LclVarDsc* const varDsc = &compiler->lvaTable[varNum];
- if (isCandidateVar(varDsc))
+ assert(isCandidateVar(varDsc));
+ RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition;
+ // We should be able to assert that lastRP is non-null if it is live-out, but sometimes liveness
+ // lies.
+ if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum))
{
- RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition;
- if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum))
- {
- lastRP->lastUse = false;
- }
+ lastRP->lastUse = false;
}
}
}
@@ -2600,285 +2302,424 @@ void LinearScan::validateIntervals()
#endif // DEBUG
//------------------------------------------------------------------------
-// GetIndirInfo: Get the source registers for an indirection that might be contained.
+// BuildDef: Build a RefTypeDef RefPosition for the given node
//
// Arguments:
-// node - The node of interest
+// tree - The node that defines a register
+// dstCandidates - The candidate registers for the definition
+// multiRegIdx - The index of the definition, defaults to zero.
+// Only non-zero for multi-reg nodes.
//
// Return Value:
-// The number of source registers used by the *parent* of this node.
+// The newly created RefPosition.
//
// Notes:
-// Adds the defining node for each register to the useList.
+// Adds the RefInfo for the definition to the defList.
//
-int LinearScan::GetIndirInfo(GenTreeIndir* indirTree)
+RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int multiRegIdx)
{
- GenTree* const addr = indirTree->gtOp1;
- if (!addr->isContained())
+ assert(!tree->isContained());
+ RegisterType type = getDefType(tree);
+#ifdef FEATURE_MULTIREG_ARGS_OR_RET
+ if (tree->TypeGet() == TYP_STRUCT)
{
- appendLocationInfoToList(addr);
- return 1;
+ // We require a fixed set of candidates for this case.
+ assert(isSingleRegister(dstCandidates));
+ type = (dstCandidates & allRegs(TYP_FLOAT)) != RBM_NONE ? TYP_FLOAT : TYP_INT;
}
- if (!addr->OperIs(GT_LEA))
+#else
+ assert(!tree->TypeGet() == TYP_STRUCT);
+#endif
+
+ Interval* interval = newInterval(type);
+ if (tree->gtRegNum != REG_NA)
{
- return 0;
+ if (!tree->IsMultiRegNode() || (multiRegIdx == 0))
+ {
+ assert((dstCandidates == RBM_NONE) || (dstCandidates == genRegMask(tree->gtRegNum)));
+ dstCandidates = genRegMask(tree->gtRegNum);
+ }
+ else
+ {
+ assert(isSingleRegister(dstCandidates));
+ }
}
-
- GenTreeAddrMode* const addrMode = addr->AsAddrMode();
-
- unsigned srcCount = 0;
- if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
+#ifdef _TARGET_X86_
+ else if (varTypeIsByte(tree))
{
- appendLocationInfoToList(addrMode->Base());
- srcCount++;
+ if (dstCandidates == RBM_NONE)
+ {
+ dstCandidates = allRegs(TYP_INT);
+ }
+ dstCandidates &= ~RBM_NON_BYTE_REGS;
+ assert(dstCandidates != RBM_NONE);
}
- if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
+#endif // _TARGET_X86_
+ if (pendingDelayFree)
{
- appendLocationInfoToList(addrMode->Index());
- srcCount++;
+ interval->hasInterferingUses = true;
+ // pendingDelayFree = false;
}
- return srcCount;
+ RefPosition* defRefPosition =
+ newRefPosition(interval, currentLoc + 1, RefTypeDef, tree, dstCandidates, multiRegIdx);
+ if (tree->IsUnusedValue())
+ {
+ defRefPosition->isLocalDefUse = true;
+ defRefPosition->lastUse = true;
+ }
+ else
+ {
+ RefInfoListNode* refInfo = listNodePool.GetNode(defRefPosition, tree);
+ defList.Append(refInfo);
+ }
+ if (tgtPrefUse != nullptr)
+ {
+ interval->assignRelatedIntervalIfUnassigned(tgtPrefUse->getInterval());
+ }
+ return defRefPosition;
}
//------------------------------------------------------------------------
-// GetOperandInfo: Get the source registers for an operand that might be contained.
+// BuildDef: Build one or more RefTypeDef RefPositions for the given node
//
// Arguments:
-// node - The node of interest
-// useList - The list of uses for the node that we're currently processing
-//
-// Return Value:
-// The number of source registers used by the *parent* of this node.
+// tree - The node that defines a register
+// dstCount - The number of registers defined by the node
+// dstCandidates - the candidate registers for the definition
//
// Notes:
-// Adds the defining node for each register to the given useList.
+// Adds the RefInfo for the definitions to the defList.
//
-int LinearScan::GetOperandInfo(GenTree* node)
+void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates)
{
- if (!node->isContained())
+ bool fixedReg = false;
+ if ((dstCount > 1) && (dstCandidates != RBM_NONE) && (genCountBits(dstCandidates) == dstCount))
{
- appendLocationInfoToList(node);
- return 1;
+ fixedReg = true;
}
-
-#if !defined(_TARGET_64BIT_)
- if (node->OperIs(GT_LONG))
+ ReturnTypeDesc* retTypeDesc = nullptr;
+ if (tree->IsMultiRegCall())
{
- return appendBinaryLocationInfoToList(node->AsOp());
- }
-#endif // !defined(_TARGET_64BIT_)
- if (node->OperIsIndir())
- {
- const unsigned srcCount = GetIndirInfo(node->AsIndir());
- return srcCount;
+ retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
}
- if (node->OperIsHWIntrinsic())
+ for (int i = 0; i < dstCount; i++)
{
- appendLocationInfoToList(node->gtGetOp1());
- return 1;
+ regMaskTP thisDstCandidates;
+ if (fixedReg)
+ {
+ // In case of multi-reg call node, we have to query the ith position return register.
+ // For all other cases of multi-reg definitions, the registers must be in sequential order.
+ if (retTypeDesc != nullptr)
+ {
+ thisDstCandidates = genRegMask(tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i));
+ assert((dstCandidates & thisDstCandidates) != RBM_NONE);
+ }
+ else
+ {
+ thisDstCandidates = genFindLowestBit(dstCandidates);
+ }
+ dstCandidates &= ~thisDstCandidates;
+ }
+ else
+ {
+ thisDstCandidates = dstCandidates;
+ }
+ BuildDef(tree, thisDstCandidates, i);
}
-
- return 0;
}
//------------------------------------------------------------------------
-// GetOperandInfo: Get the source registers for an operand that might be contained.
+// BuildDef: Build one or more RefTypeDef RefPositions for the given node,
+// as well as kills as specified by the given mask.
//
// Arguments:
-// node - The node of interest
-// useList - The list of uses for the node that we're currently processing
-//
-// Return Value:
-// The number of source registers used by the *parent* of this node.
+// tree - The node that defines a register
+// dstCount - The number of registers defined by the node
+// dstCandidates - The candidate registers for the definition
+// killMask - The mask of registers killed by this node
//
// Notes:
-// Adds the defining node for each register to the useList.
+// Adds the RefInfo for the definitions to the defList.
+// The def and kill functionality is folded into a single method so that the
+// save and restores of upper vector registers can be bracketed around the def.
//
-int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo)
+void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask)
{
- LocationInfoListNode* prevLast = useList.Last();
- int srcCount = GetOperandInfo(node);
- if (prevLast == nullptr)
+ // Generate Kill RefPositions
+ assert(killMask == getKillSetForNode(tree));
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ VARSET_TP liveLargeVectors(VarSetOps::UninitVal());
+ bool doLargeVectorRestore = false;
+#endif
+ if (killMask != RBM_NONE)
{
- *pFirstInfo = useList.Begin();
+ buildKillPositionsForNode(tree, currentLoc + 1, killMask);
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE))
+ {
+ // Build RefPositions for saving any live large vectors.
+ // This must be done after the kills, so that we know which large vectors are still live.
+ VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
+ buildUpperVectorSaveRefPositions(tree, currentLoc + 1, killMask));
+ doLargeVectorRestore = true;
+ }
+#endif
}
- else
+
+ // Now, create the Def(s)
+ BuildDefs(tree, dstCount, dstCandidates);
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // Finally, generate the UpperVectorRestores
+ if (doLargeVectorRestore)
{
- *pFirstInfo = prevLast->Next();
+ buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors);
}
- return srcCount;
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
-void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node)
+//------------------------------------------------------------------------
+// BuildUse: Remove the RefInfoListNode for the given multi-reg index of the given node from
+// the defList, and build a use RefPosition for the associated Interval.
+//
+// Arguments:
+// operand - The node of interest
+// candidates - The register candidates for the use
+// multiRegIdx - The index of the multireg def/use
+//
+// Return Value:
+// The newly created use RefPosition
+//
+// Notes:
+// The node must not be contained, and must have been processed by buildRefPositionsForNode().
+//
+RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx)
{
- _dstCount = 0;
- _srcCount = 0;
- _internalIntCount = 0;
- _internalFloatCount = 0;
-
- isLocalDefUse = false;
- isDelayFree = false;
- hasDelayFreeSrc = false;
- isTgtPref = false;
- isInternalRegDelayFree = false;
+ assert(!operand->isContained());
+ Interval* interval;
+ bool regOptional = operand->IsRegOptional();
- regMaskTP dstCandidates;
-
- // if there is a reg indicated on the tree node, use that for dstCandidates
- // the exception is the NOP, which sometimes show up around late args.
- // TODO-Cleanup: get rid of those NOPs.
- if (node->gtRegNum == REG_STK)
+ if (operand->gtRegNum != REG_NA)
{
- dstCandidates = RBM_NONE;
+ candidates = genRegMask(operand->gtRegNum);
}
- else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
+ if (isCandidateLocalRef(operand))
{
-#ifdef ARM_SOFTFP
- if (node->OperGet() == GT_PUTARG_REG)
- {
- dstCandidates = lsra->allRegs(TYP_INT);
- }
- else
-#endif
+ interval = getIntervalForLocalVarNode(operand->AsLclVarCommon());
+
+ // We have only approximate last-use information at this point. This is because the
+ // execution order doesn't actually reflect the true order in which the localVars
+ // are referenced - but the order of the RefPositions will, so we recompute it after
+ // RefPositions are built.
+ // Use the old value for setting currentLiveVars - note that we do this with the
+ // not-quite-correct setting of lastUse. However, this is OK because
+ // 1) this is only for preferencing, which doesn't require strict correctness, and
+ // 2) the cases where these out-of-order uses occur should not overlap a kill.
+ // TODO-Throughput: clean this up once we have the execution order correct. At that point
+ // we can update currentLiveVars at the same place that we create the RefPosition.
+ if ((operand->gtFlags & GTF_VAR_DEATH) != 0)
{
- dstCandidates = lsra->allRegs(node->TypeGet());
+ unsigned varIndex = interval->getVarIndex(compiler);
+ VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex);
}
}
else
{
- dstCandidates = genRegMask(node->gtRegNum);
+ RefInfoListNode* refInfo = defList.removeListNode(operand, multiRegIdx);
+ RefPosition* defRefPos = refInfo->ref;
+ assert(defRefPos->multiRegIdx == multiRegIdx);
+ interval = defRefPos->getInterval();
+ listNodePool.ReturnNode(refInfo);
+ operand = nullptr;
}
-
- setDstCandidates(lsra, dstCandidates);
- srcCandsIndex = dstCandsIndex;
-
- setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
-
-#ifdef DEBUG
- isInitialized = true;
-#endif
-
- assert(IsValid(lsra));
+ RefPosition* useRefPos = newRefPosition(interval, currentLoc, RefTypeUse, operand, candidates, multiRegIdx);
+ useRefPos->setAllocateIfProfitable(regOptional);
+ return useRefPos;
}
//------------------------------------------------------------------------
-// getSrcCandidates: Get the source candidates (candidates for the consumer
-// of the node) from the TreeNodeInfo
+// BuildIndirUses: Build Use RefPositions for an indirection that might be contained
//
// Arguments:
-// lsra - the LinearScan object
+// indirTree - The indirection node of interest
//
// Return Value:
-// The set of registers (as a register mask) that are candidates for the
-// consumer of the node
+// The number of source registers used by the *parent* of this node.
//
// Notes:
-// The LinearScan object maintains the mapping from the indices kept in the
-// TreeNodeInfo to the actual register masks.
+// This method may only be used if the candidates are the same for all sources.
//
-regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra)
+int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates)
{
- return lsra->GetRegMaskForIndex(srcCandsIndex);
-}
+ GenTree* const addr = indirTree->gtOp1;
+ if (!addr->isContained())
+ {
+ BuildUse(addr, candidates);
+ return 1;
+ }
+ if (!addr->OperIs(GT_LEA))
+ {
+ return 0;
+ }
-//------------------------------------------------------------------------
-// setSrcCandidates: Set the source candidates (candidates for the consumer
-// of the node) on the TreeNodeInfo
-//
-// Arguments:
-// lsra - the LinearScan object
-//
-// Notes: see getSrcCandidates
-//
-void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask)
-{
- LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
- assert(FitsIn<unsigned char>(i));
- srcCandsIndex = (unsigned char)i;
+ GenTreeAddrMode* const addrMode = addr->AsAddrMode();
+
+ unsigned srcCount = 0;
+ if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
+ {
+ BuildUse(addrMode->Base(), candidates);
+ srcCount++;
+ }
+ if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
+ {
+ BuildUse(addrMode->Index(), candidates);
+ srcCount++;
+ }
+ return srcCount;
}
//------------------------------------------------------------------------
-// getDstCandidates: Get the dest candidates (candidates for the definition
-// of the node) from the TreeNodeInfo
+// BuildOperandUses: Build Use RefPositions for an operand that might be contained.
//
// Arguments:
-// lsra - the LinearScan object
+// node - The node of interest
//
// Return Value:
-// The set of registers (as a register mask) that are candidates for the
-// node itself
-//
-// Notes: see getSrcCandidates
+// The number of source registers used by the *parent* of this node.
//
-regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra)
+int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates)
{
- return lsra->GetRegMaskForIndex(dstCandsIndex);
+ if (!node->isContained())
+ {
+ BuildUse(node, candidates);
+ return 1;
+ }
+
+#if !defined(_TARGET_64BIT_)
+ if (node->OperIs(GT_LONG))
+ {
+ return BuildBinaryUses(node->AsOp(), candidates);
+ }
+#endif // !defined(_TARGET_64BIT_)
+ if (node->OperIsIndir())
+ {
+ return BuildIndirUses(node->AsIndir(), candidates);
+ }
+ if (node->OperIsHWIntrinsic())
+ {
+ BuildUse(node->gtGetOp1(), candidates);
+ return 1;
+ }
+
+ return 0;
}
//------------------------------------------------------------------------
-// setDstCandidates: Set the dest candidates (candidates for the definition
-// of the node) on the TreeNodeInfo
+// setDelayFree: Mark a RefPosition as delayRegFree, and set pendingDelayFree
//
// Arguments:
-// lsra - the LinearScan object
+// use - The use RefPosition to mark
//
-// Notes: see getSrcCandidates
-//
-void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask)
+void LinearScan::setDelayFree(RefPosition* use)
{
- LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
- assert(FitsIn<unsigned char>(i));
- dstCandsIndex = (unsigned char)i;
+ use->delayRegFree = true;
+ pendingDelayFree = true;
}
//------------------------------------------------------------------------
-// getInternalCandidates: Get the internal candidates (candidates for the internal
-// temporary registers used by a node) from the TreeNodeInfo
+// BuildDelayFreeUses: Build Use RefPositions for an operand that might be contained,
+// and which need to be marked delayRegFree
//
// Arguments:
-// lsra - the LinearScan object
+// node - The node of interest
//
// Return Value:
-// The set of registers (as a register mask) that are candidates for the
-// internal temporary registers.
-//
-// Notes: see getSrcCandidates
+// The number of source registers used by the *parent* of this node.
//
-regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra)
+int LinearScan::BuildDelayFreeUses(GenTree* node, regMaskTP candidates)
{
- return lsra->GetRegMaskForIndex(internalCandsIndex);
-}
+ RefPosition* use;
+ if (!node->isContained())
+ {
+ use = BuildUse(node, candidates);
+ setDelayFree(use);
+ return 1;
+ }
+ else if (!node->OperIsIndir())
+ {
+ return 0;
+ }
+ GenTreeIndir* indirTree = node->AsIndir();
+ GenTree* addr = indirTree->gtOp1;
+ if (!addr->isContained())
+ {
+ use = BuildUse(addr, candidates);
+ setDelayFree(use);
+ return 1;
+ }
+ if (!addr->OperIs(GT_LEA))
+ {
+ return 0;
+ }
-//------------------------------------------------------------------------
-// getInternalCandidates: Set the internal candidates (candidates for the internal
-// temporary registers used by a node) on the TreeNodeInfo
-//
-// Arguments:
-// lsra - the LinearScan object
-//
-// Notes: see getSrcCandidates
-//
-void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask)
-{
- LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
- assert(FitsIn<unsigned char>(i));
- internalCandsIndex = (unsigned char)i;
+ GenTreeAddrMode* const addrMode = addr->AsAddrMode();
+
+ unsigned srcCount = 0;
+ if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
+ {
+ use = BuildUse(addrMode->Base(), candidates);
+ setDelayFree(use);
+ srcCount++;
+ }
+ if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
+ {
+ use = BuildUse(addrMode->Index(), candidates);
+ setDelayFree(use);
+ srcCount++;
+ }
+ return srcCount;
}
//------------------------------------------------------------------------
-// addInternalCandidates: Add internal candidates (candidates for the internal
-// temporary registers used by a node) on the TreeNodeInfo
+// BuildBinaryUses: Get the RefInfoListNodes for the operands of the
+// given node, and build uses for them.
//
// Arguments:
-// lsra - the LinearScan object
+// node - a GenTreeOp
//
-// Notes: see getSrcCandidates
+// Return Value:
+// The number of actual register operands.
//
-void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
+// Notes:
+// The operands must already have been processed by buildRefPositionsForNode, and their
+// RefInfoListNodes placed in the defList.
+//
+int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates)
{
- LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask);
- assert(FitsIn<unsigned char>(i));
- internalCandsIndex = (unsigned char)i;
+#ifdef _TARGET_XARCH_
+ RefPosition* tgtPrefUse = nullptr;
+ if (node->OperIsBinary() && isRMWRegOper(node))
+ {
+ return BuildRMWUses(node, candidates);
+ }
+#endif // _TARGET_XARCH_
+ int srcCount = 0;
+ GenTree* op1 = node->gtOp1;
+ GenTree* op2 = node->gtGetOp2IfPresent();
+ if (node->IsReverseOp() && (op2 != nullptr))
+ {
+ srcCount += BuildOperandUses(op2, candidates);
+ op2 = nullptr;
+ }
+ if (op1 != nullptr)
+ {
+ srcCount += BuildOperandUses(op1, candidates);
+ }
+ if (op2 != nullptr)
+ {
+ srcCount += BuildOperandUses(op2, candidates);
+ }
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -2893,12 +2734,24 @@ void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
// - Handling of contained immediates.
// - Requesting an internal register for SIMD12 stores.
//
-void LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
+int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- TreeNodeInfo* info = currentNodeInfo;
- GenTree* op1 = storeLoc->gtGetOp1();
+ GenTree* op1 = storeLoc->gtGetOp1();
+ int srcCount;
+ RefPosition* singleUseRef = nullptr;
+ LclVarDsc* varDsc = &compiler->lvaTable[storeLoc->gtLclNum];
- assert(info->dstCount == 0);
+// First, define internal registers.
+#ifdef FEATURE_SIMD
+ RefPosition* internalFloatDef = nullptr;
+ if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12))
+ {
+ // Need an additional register to extract upper 4 bytes of Vector3.
+ internalFloatDef = buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
+ }
+#endif // FEATURE_SIMD
+
+ // Second, use source registers.
if (op1->IsMultiRegCall())
{
@@ -2911,13 +2764,12 @@ void LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
GenTreeCall* call = op1->AsCall();
ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
unsigned regCount = retTypeDesc->GetReturnRegCount();
- info->srcCount = regCount;
+ srcCount = retTypeDesc->GetReturnRegCount();
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
- LocationInfoListNode* locInfo = getLocationInfo(op1);
- locInfo->info.setSrcCandidates(this, srcCandidates);
- useList.Append(locInfo);
+ for (int i = 0; i < srcCount; ++i)
+ {
+ BuildUse(op1, RBM_NONE, i);
+ }
}
#ifndef _TARGET_64BIT_
else if (varTypeIsLong(op1))
@@ -2929,77 +2781,125 @@ void LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
// eax (and killing edx). This only works because it always occurs as var = GT_MUL_LONG (ensured by
// DecomposeMul), and therefore edx won't be reused before the store.
// TODO-X86-Cleanup: GT_MUL_LONG should be a multireg node on x86, just as on ARM.
- info->srcCount = 1;
+ srcCount = 1;
+ singleUseRef = BuildUse(op1);
#else
- info->srcCount = 2;
+ srcCount = 2;
+ BuildUse(op1, allRegs(TYP_INT), 0);
+ BuildUse(op1, allRegs(TYP_INT), 1);
#endif
- appendLocationInfoToList(op1);
}
else
{
assert(op1->OperIs(GT_LONG));
- assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained());
- info->srcCount = appendBinaryLocationInfoToList(op1->AsOp());
- assert(info->srcCount == 2);
+ assert(op1->isContained() && !op1->gtGetOp1()->isContained() && !op1->gtGetOp2()->isContained());
+ srcCount = BuildBinaryUses(op1->AsOp());
+ assert(srcCount == 2);
}
}
#endif // !_TARGET_64BIT_
else if (op1->isContained())
{
- info->srcCount = 0;
+ srcCount = 0;
}
else
{
- info->srcCount = 1;
- appendLocationInfoToList(op1);
+ srcCount = 1;
+ regMaskTP srcCandidates = RBM_NONE;
+#ifdef _TARGET_X86_
+ if (varTypeIsByte(storeLoc))
+ {
+ srcCandidates = allByteRegs();
+ }
+#endif // _TARGET_X86_
+ singleUseRef = BuildUse(op1, srcCandidates);
}
+// Third, use internal registers.
#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc))
+ buildInternalRegisterUses();
+#endif // FEATURE_SIMD
+
+ // Fourth, define destination registers.
+
+ // Add the lclVar to currentLiveVars (if it will remain live)
+ if (isCandidateVar(varDsc))
{
- if (!op1->isContained() && (storeLoc->TypeGet() == TYP_SIMD12))
+ assert(varDsc->lvTracked);
+ unsigned varIndex = varDsc->lvVarIndex;
+ Interval* varDefInterval = getIntervalForLocalVar(varIndex);
+ if ((storeLoc->gtFlags & GTF_VAR_DEATH) == 0)
{
-// Need an additional register to extract upper 4 bytes of Vector3.
-#ifdef _TARGET_XARCH_
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
-#elif defined(_TARGET_ARM64_)
- info->internalIntCount = 1;
-#else
-#error "Unknown target architecture for STORE_LCL_VAR of SIMD12"
-#endif
+ VarSetOps::AddElemD(compiler, currentLiveVars, varIndex);
}
+ if (singleUseRef != nullptr)
+ {
+ Interval* srcInterval = singleUseRef->getInterval();
+ if (srcInterval->relatedInterval == nullptr)
+ {
+ // Preference the source to the dest, unless this is a non-last-use localVar.
+ // Note that the last-use info is not correct, but it is a better approximation than preferencing
+ // the source to the dest, if the source's lifetime extends beyond the dest.
+ if (!srcInterval->isLocalVar || (singleUseRef->treeNode->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ srcInterval->assignRelatedInterval(varDefInterval);
+ }
+ }
+ else if (!srcInterval->isLocalVar)
+ {
+ // Preference the source to dest, if src is not a local var.
+ srcInterval->assignRelatedInterval(varDefInterval);
+ }
+ }
+ newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, allRegs(storeLoc->TypeGet()));
}
-#endif // FEATURE_SIMD
+ else
+ {
+ if (storeLoc->gtOp1->OperIs(GT_BITCAST))
+ {
+ storeLoc->gtType = storeLoc->gtOp1->gtType = storeLoc->gtOp1->AsUnOp()->gtOp1->TypeGet();
+ RegisterType registerType = regType(storeLoc->TypeGet());
+ noway_assert(singleUseRef != nullptr);
+
+ Interval* srcInterval = singleUseRef->getInterval();
+ srcInterval->registerType = registerType;
+
+ RefPosition* srcDefPosition = srcInterval->firstRefPosition;
+ assert(srcDefPosition != nullptr);
+ assert(srcDefPosition->refType == RefTypeDef);
+ assert(srcDefPosition->treeNode == storeLoc->gtOp1);
+
+ srcDefPosition->registerAssignment = allRegs(registerType);
+ singleUseRef->registerAssignment = allRegs(registerType);
+ }
+ }
+
+ return srcCount;
}
//------------------------------------------------------------------------
-// BuildSimple: Sets the srcCount for all the trees
-// without special handling based on the tree node type.
+// BuildSimple: Builds use RefPositions for trees requiring no special handling
//
// Arguments:
// tree - The node of interest
//
// Return Value:
-// None.
+// The number of use RefPositions created
//
-void LinearScan::BuildSimple(GenTree* tree)
+int LinearScan::BuildSimple(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
- unsigned kind = tree->OperKind();
- assert(info->dstCount == (tree->IsValue() ? 1 : 0));
- if (kind & (GTK_CONST | GTK_LEAF))
+ unsigned kind = tree->OperKind();
+ int srcCount = 0;
+ if ((kind & (GTK_CONST | GTK_LEAF)) == 0)
{
- info->srcCount = 0;
+ assert((kind & GTK_SMPOP) != 0);
+ srcCount = BuildBinaryUses(tree->AsOp());
}
- else if (kind & (GTK_SMPOP))
+ if (tree->IsValue() && !tree->IsUnusedValue())
{
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- }
- else
- {
- unreached();
+ BuildDef(tree);
}
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -3011,25 +2911,20 @@ void LinearScan::BuildSimple(GenTree* tree)
// Return Value:
// None.
//
-void LinearScan::BuildReturn(GenTree* tree)
+int LinearScan::BuildReturn(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
- assert(info->dstCount == 0);
- GenTree* op1 = tree->gtGetOp1();
+ int srcCount = 0;
+ GenTree* op1 = tree->gtGetOp1();
#if !defined(_TARGET_64BIT_)
if (tree->TypeGet() == TYP_LONG)
{
assert((op1->OperGet() == GT_LONG) && op1->isContained());
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
- info->srcCount = 2;
- LocationInfoListNode* loValInfo = getLocationInfo(loVal);
- LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
- loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO);
- hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI);
- useList.Append(loValInfo);
- useList.Append(hiValInfo);
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+ srcCount = 2;
+ BuildUse(loVal, RBM_LNGRET_LO);
+ BuildUse(hiVal, RBM_LNGRET_HI);
}
else
#endif // !defined(_TARGET_64BIT_)
@@ -3037,19 +2932,27 @@ void LinearScan::BuildReturn(GenTree* tree)
{
regMaskTP useCandidates = RBM_NONE;
- info->srcCount = 1;
+ srcCount = 1;
#if FEATURE_MULTIREG_RET
if (varTypeIsStruct(tree))
{
// op1 has to be either an lclvar or a multi-reg returning call
- if (op1->OperGet() != GT_LCL_VAR)
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ BuildUse(op1, useCandidates);
+ }
+ else
{
noway_assert(op1->IsMultiRegCall());
ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
+ srcCount = retTypeDesc->GetReturnRegCount();
useCandidates = retTypeDesc->GetABIReturnRegs();
+ for (int i = 0; i < srcCount; i++)
+ {
+ BuildUse(op1, useCandidates, i);
+ }
}
}
else
@@ -3075,15 +2978,68 @@ void LinearScan::BuildReturn(GenTree* tree)
useCandidates = RBM_INTRET;
break;
}
+ BuildUse(op1, useCandidates);
}
-
- LocationInfoListNode* locationInfo = getLocationInfo(op1);
- if (useCandidates != RBM_NONE)
- {
- locationInfo->info.setSrcCandidates(this, useCandidates);
- }
- useList.Append(locationInfo);
}
+ // No kills or defs
+
+ return srcCount;
+}
+
+//------------------------------------------------------------------------
+// supportsSpecialPutArg: Determine if we can support specialPutArgs
+//
+// Return Value:
+// True iff specialPutArg intervals can be supported.
+//
+// Notes:
+// See below.
+//
+
+bool LinearScan::supportsSpecialPutArg()
+{
+#if defined(DEBUG) && defined(_TARGET_X86_)
+ // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode
+ // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the
+ // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in
+ // situations like the following:
+ //
+ // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d>
+ //
+ // /--* t1026 ref
+ // t1352 = * putarg_reg ref REG NA
+ //
+ // t342 = lclVar int V14 loc6 u:4 REG NA $50c
+ //
+ // t343 = const int 1 REG NA $41
+ //
+ // /--* t342 int
+ // +--* t343 int
+ // t344 = * + int REG NA $495
+ //
+ // t345 = lclVar int V04 arg4 u:2 REG NA $100
+ //
+ // /--* t344 int
+ // +--* t345 int
+ // t346 = * % int REG NA $496
+ //
+ // /--* t346 int
+ // t1353 = * putarg_reg int REG NA
+ //
+ // t1354 = lclVar ref V52 tmp35 (last use) REG NA
+ //
+ // /--* t1354 ref
+ // t1355 = * lea(b+0) byref REG NA
+ //
+ // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the
+ // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register
+ // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to
+ // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be
+ // available due to the special put arg.
+ return getStressLimitRegs() != LSRA_LIMIT_CALLER;
+#else
+ return true;
+#endif
}
//------------------------------------------------------------------------
@@ -3099,38 +3055,58 @@ void LinearScan::BuildReturn(GenTree* tree)
// Return Value:
// None.
//
-void LinearScan::BuildPutArgReg(GenTreeUnOp* node)
+int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
{
- TreeNodeInfo* info = currentNodeInfo;
assert(node != nullptr);
assert(node->OperIsPutArgReg());
- info->srcCount = 1;
regNumber argReg = node->gtRegNum;
assert(argReg != REG_NA);
+ bool isSpecialPutArg = false;
+ int srcCount = 1;
// Set the register requirements for the node.
regMaskTP argMask = genRegMask(argReg);
+ // To avoid redundant moves, have the argument operand computed in the
+ // register in which the argument is passed to the call.
+ GenTree* op1 = node->gtOp1;
+ RefPosition* use = BuildUse(op1, argMask);
+
+ if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0))
+ {
+ // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use,
+ // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
+ // (which is actually what we hope will happen).
+ JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
+
+ // Preference the destination to the interval of the first register defined by the first operand.
+ assert(use->getInterval()->isLocalVar);
+ isSpecialPutArg = true;
+ tgtPrefUse = use;
+ }
+
#ifdef _TARGET_ARM_
// If type of node is `long` then it is actually `double`.
// The actual `long` types must have been transformed as a field list with two fields.
if (node->TypeGet() == TYP_LONG)
{
- info->srcCount++;
- info->dstCount = info->srcCount;
+ srcCount++;
+ regMaskTP argMaskHi = genRegMask(REG_NEXT(argReg));
assert(genRegArgNext(argReg) == REG_NEXT(argReg));
- argMask |= genRegMask(REG_NEXT(argReg));
+ use = BuildUse(op1, argMaskHi, 1);
+ BuildDef(node, argMask, 0);
+ BuildDef(node, argMaskHi, 1);
}
+ else
#endif // _TARGET_ARM_
- info->setDstCandidates(this, argMask);
- info->setSrcCandidates(this, argMask);
-
- // To avoid redundant moves, have the argument operand computed in the
- // register in which the argument is passed to the call.
- LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1);
- op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this));
- op1Info->info.isDelayFree = true;
- useList.Append(op1Info);
+ {
+ RefPosition* def = BuildDef(node, argMask);
+ if (isSpecialPutArg)
+ {
+ def->getInterval()->isSpecialPutArg = true;
+ }
+ }
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -3152,15 +3128,15 @@ void LinearScan::BuildPutArgReg(GenTreeUnOp* node)
void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
{
#if FEATURE_VARARG
- TreeNodeInfo* info = currentNodeInfo;
if (call->IsVarargs() && varTypeIsFloating(argNode))
{
*callHasFloatRegArgs = true;
+ // We'll have to return the internal def and then later create a use for it.
regNumber argReg = argNode->gtRegNum;
regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- info->setInternalIntCount(info->internalIntCount + 1);
- info->addInternalCandidates(this, genRegMask(targetReg));
+
+ buildInternalIntRegisterDefForNode(call, genRegMask(targetReg));
}
#endif // FEATURE_VARARG
}
@@ -3171,33 +3147,27 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* c
// Arguments:
// tree - The STORE_IND for which a write barrier is required
//
-void LinearScan::BuildGCWriteBarrier(GenTree* tree)
+int LinearScan::BuildGCWriteBarrier(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
- GenTree* dst = tree;
- GenTree* addr = tree->gtOp.gtOp1;
- GenTree* src = tree->gtOp.gtOp2;
- LocationInfoListNode* addrInfo = getLocationInfo(addr);
- LocationInfoListNode* srcInfo = getLocationInfo(src);
+ GenTree* dst = tree;
+ GenTree* addr = tree->gtGetOp1();
+ GenTree* src = tree->gtGetOp2();
// In the case where we are doing a helper assignment, even if the dst
// is an indir through an lea, we need to actually instantiate the
// lea in a register
assert(!addr->isContained() && !src->isContained());
- useList.Append(addrInfo);
- useList.Append(srcInfo);
- info->srcCount = 2;
- assert(info->dstCount == 0);
- bool customSourceRegs = false;
+ int srcCount = 2;
+ regMaskTP addrCandidates = RBM_ARG_0;
+ regMaskTP srcCandidates = RBM_ARG_1;
#if defined(_TARGET_ARM64_)
// the 'addr' goes into x14 (REG_WRITE_BARRIER_DST)
// the 'src' goes into x15 (REG_WRITE_BARRIER_SRC)
//
- addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST);
- srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
- customSourceRegs = true;
+ addrCandidates = RBM_WRITE_BARRIER_DST;
+ srcCandidates = RBM_WRITE_BARRIER_SRC;
#elif defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
@@ -3207,26 +3177,18 @@ void LinearScan::BuildGCWriteBarrier(GenTree* tree)
// Special write barrier:
// op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
// op2 (src) goes into any int register.
- addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER);
- srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
- customSourceRegs = true;
+ addrCandidates = RBM_WRITE_BARRIER;
+ srcCandidates = RBM_WRITE_BARRIER_SRC;
}
#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
- if (!customSourceRegs)
- {
- // For the standard JIT Helper calls:
- // op1 (addr) goes into REG_ARG_0 and
- // op2 (src) goes into REG_ARG_1
- addrInfo->info.setSrcCandidates(this, RBM_ARG_0);
- srcInfo->info.setSrcCandidates(this, RBM_ARG_1);
- }
+ BuildUse(addr, addrCandidates);
+ BuildUse(src, srcCandidates);
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addrInfo->info.dstCount == 1);
- assert(srcInfo->info.dstCount == 1);
+ regMaskTP killMask = getKillSetForStoreInd(tree->AsStoreInd());
+ buildKillPositionsForNode(tree, currentLoc + 1, killMask);
+ return 2;
}
//------------------------------------------------------------------------
@@ -3238,22 +3200,71 @@ void LinearScan::BuildGCWriteBarrier(GenTree* tree)
// Return Value:
// None.
//
-void LinearScan::BuildCmp(GenTree* tree)
+int LinearScan::BuildCmp(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP));
-
- info->srcCount = 0;
- assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
+ regMaskTP dstCandidates = RBM_NONE;
+ regMaskTP op1Candidates = RBM_NONE;
+ regMaskTP op2Candidates = RBM_NONE;
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
#ifdef _TARGET_X86_
// If the compare is used by a jump, we just need to set the condition codes. If not, then we need
// to store the result into the low byte of a register, which requires the dst be a byteable register.
- // We always set the dst candidates, though, because if this is compare is consumed by a jump, they
- // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
- // that flag is maintained until this location (especially for decomposed long compares).
- info->setDstCandidates(this, RBM_BYTE_REGS);
+ if (tree->TypeGet() != TYP_VOID)
+ {
+ dstCandidates = allByteRegs();
+ }
+ bool needByteRegs = false;
+ if (varTypeIsByte(tree))
+ {
+ if (!varTypeIsFloating(op1))
+ {
+ needByteRegs = true;
+ }
+ }
+ // Example1: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
+ // ubyte as the result of comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size. Here is an example of generated code:
+ // cmp dl, byte ptr[addr mode]
+ // movzx edx, dl
+ else if (varTypeIsByte(op1) && varTypeIsByte(op2))
+ {
+ needByteRegs = true;
+ }
+ // Example2: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses
+ // ubyte as the result of the comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size.
+ else if (varTypeIsByte(op1) && op2->IsCnsIntOrI())
+ {
+ needByteRegs = true;
+ }
+ // Example3: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses
+ // ubyte as the result of the comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size.
+ else if (op1->IsCnsIntOrI() && varTypeIsByte(op2))
+ {
+ needByteRegs = true;
+ }
+ if (needByteRegs)
+ {
+ if (!op1->isContained())
+ {
+ op1Candidates = allByteRegs();
+ }
+ if (!op2->isContained())
+ {
+ op2Candidates = allByteRegs();
+ }
+ }
#endif // _TARGET_X86_
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ int srcCount = BuildOperandUses(op1, op1Candidates);
+ srcCount += BuildOperandUses(op2, op2Candidates);
+ if (tree->TypeGet() != TYP_VOID)
+ {
+ BuildDef(tree, dstCandidates);
+ }
+ return srcCount;
}
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index cebe6a13bf..d11b396a3b 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -27,49 +27,61 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
//------------------------------------------------------------------------
-// BuildNode: Set register requirements for a node
+// BuildNode: Build the RefPositions for for a node
//
// Arguments:
// treeNode - the node of interest
//
+// Return Value:
+// The number of sources consumed by this node.
+//
// Notes:
// Preconditions:
-// LSRA Has been initialized and there is a TreeNodeInfo node
-// already allocated and initialized for every tree in the IR.
+// LSRA Has been initialized.
+//
// Postconditions:
-// Every TreeNodeInfo instance has the right annotations on register
-// requirements needed by LSRA to build the Interval Table (source,
-// destination and internal [temp] register counts).
+// RefPositions have been built for all the register defs and uses required
+// for this node.
//
-void LinearScan::BuildNode(GenTree* tree)
+int LinearScan::BuildNode(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
assert(!tree->isContained());
+ Interval* prefSrcInterval = nullptr;
+ int srcCount;
+ int dstCount = 0;
+ regMaskTP dstCandidates = RBM_NONE;
+ regMaskTP killMask = RBM_NONE;
+ bool isLocalDefUse = false;
+ // Reset the build-related members of LinearScan.
+ clearBuildState();
+
+ // Set the default dstCount. This may be modified below.
if (tree->IsValue())
{
- info->dstCount = 1;
+ dstCount = 1;
if (tree->IsUnusedValue())
{
- info->isLocalDefUse = true;
+ isLocalDefUse = true;
}
}
else
{
- info->dstCount = 0;
+ dstCount = 0;
}
// floating type generates AVX instruction (vmovss etc.), set the flag
SetContainsAVXFlags(varTypeIsFloating(tree->TypeGet()));
+
switch (tree->OperGet())
{
default:
- BuildSimple(tree);
+ srcCount = BuildSimple(tree);
break;
case GT_LCL_VAR:
// Because we do containment analysis before we redo dataflow and identify register
- // candidates, the containment analysis only !lvDoNotEnregister to estimate register
+ // candidates, the containment analysis only uses !lvDoNotEnregister to estimate register
// candidates.
// If there is a lclVar that is estimated to be register candidate but
// is not, if they were marked regOptional they should now be marked contained instead.
@@ -82,52 +94,83 @@ void LinearScan::BuildNode(GenTree* tree)
{
tree->ClearRegOptional();
tree->SetContained();
- info->dstCount = 0;
- return;
+ INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 0));
+ return 0;
}
}
__fallthrough;
case GT_LCL_FLD:
- info->srcCount = 0;
-
+ {
+ // We handle tracked variables differently from non-tracked ones. If it is tracked,
+ // we will simply add a use of the tracked variable at its parent/consumer.
+ // Otherwise, for a use we need to actually add the appropriate references for loading
+ // or storing the variable.
+ //
+ // A tracked variable won't actually get used until the appropriate ancestor tree node
+ // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
+ // to a call or an orphaned dead node.
+ //
+ LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
+ if (isCandidateVar(varDsc))
+ {
+ INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1));
+ return 0;
+ }
+ srcCount = 0;
#ifdef FEATURE_SIMD
// Need an additional register to read upper 4 bytes of Vector3.
if (tree->TypeGet() == TYP_SIMD12)
{
// We need an internal register different from targetReg in which 'tree' produces its result
// because both targetReg and internal reg will be in use at the same time.
- info->internalFloatCount = 1;
- info->isInternalRegDelayFree = true;
- info->setInternalCandidates(this, allSIMDRegs());
+ buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
+ setInternalRegsDelayFree = true;
+ buildInternalRegisterUses();
}
#endif
- break;
+ BuildDef(tree);
+ }
+ break;
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- BuildStoreLoc(tree->AsLclVarCommon());
+ srcCount = BuildStoreLoc(tree->AsLclVarCommon());
break;
case GT_FIELD_LIST:
// These should always be contained. We don't correctly allocate or
// generate code for a non-contained GT_FIELD_LIST.
noway_assert(!"Non-contained GT_FIELD_LIST");
+ srcCount = 0;
break;
case GT_LIST:
case GT_ARGPLACE:
case GT_NO_OP:
+ srcCount = 0;
+ assert(dstCount == 0);
+ break;
+
case GT_START_NONGC:
case GT_PROF_HOOK:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
+ killMask = getKillSetForProfilerHook();
+ BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
break;
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
case GT_CNS_DBL:
- info->srcCount = 0;
- assert(info->dstCount == 1);
- break;
+ {
+ srcCount = 0;
+ assert(dstCount == 1);
+ assert(!tree->IsReuseRegVal());
+ RefPosition* def = BuildDef(tree);
+ def->getInterval()->isConstant = true;
+ }
+ break;
#if !defined(_TARGET_64BIT_)
@@ -136,11 +179,11 @@ void LinearScan::BuildNode(GenTree* tree)
// An unused GT_LONG node needs to consume its sources, but need not produce a register.
tree->gtType = TYP_VOID;
tree->ClearUnusedValue();
- info->isLocalDefUse = false;
- info->srcCount = 2;
- info->dstCount = 0;
- appendLocationInfoToList(tree->gtGetOp1());
- appendLocationInfoToList(tree->gtGetOp2());
+ isLocalDefUse = false;
+ srcCount = 2;
+ dstCount = 0;
+ BuildUse(tree->gtGetOp1());
+ BuildUse(tree->gtGetOp2());
break;
#endif // !defined(_TARGET_64BIT_)
@@ -149,31 +192,27 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_COMMA:
case GT_QMARK:
case GT_COLON:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
unreached();
break;
case GT_RETURN:
- BuildReturn(tree);
+ srcCount = BuildReturn(tree);
+ killMask = getKillSetForReturn();
+ BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
break;
case GT_RETFILT:
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
if (tree->TypeGet() == TYP_VOID)
{
- info->srcCount = 0;
+ srcCount = 0;
}
else
{
assert(tree->TypeGet() == TYP_INT);
-
- info->srcCount = 1;
-
- info->setSrcCandidates(this, RBM_INTRET);
- LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
- locationInfo->info.setSrcCandidates(this, RBM_INTRET);
- useList.Append(locationInfo);
+ srcCount = 1;
+ BuildUse(tree->gtGetOp1(), RBM_INTRET);
}
break;
@@ -181,67 +220,72 @@ void LinearScan::BuildNode(GenTree* tree)
// a child), but must be considered to produce a dummy value if it
// has a type but no child
case GT_NOP:
- info->srcCount = 0;
- assert((tree->gtOp.gtOp1 == nullptr) || tree->isContained());
- if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ srcCount = 0;
+ assert((tree->gtGetOp1() == nullptr) || tree->isContained());
+ if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
{
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
+ BuildUse(tree->gtGetOp1());
+ BuildDef(tree);
}
else
{
- assert(info->dstCount == 0);
+ assert(dstCount == 0);
}
break;
case GT_JTRUE:
{
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
GenTree* cmp = tree->gtGetOp1();
assert(!cmp->IsValue());
}
break;
case GT_JCC:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_SETCC:
- info->srcCount = 0;
- assert(info->dstCount == 1);
-#ifdef _TARGET_X86_
- info->setDstCandidates(this, RBM_BYTE_REGS);
-#endif // _TARGET_X86_
+ srcCount = 0;
+ assert(dstCount == 1);
+ // This defines a byte value (note that on x64 allByteRegs() is defined as RBM_ALLINT).
+ BuildDef(tree, allByteRegs());
break;
case GT_JMP:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
case GT_SWITCH:
// This should never occur since switch nodes must not be visible at this
// point in the JIT.
- info->srcCount = 0;
+ srcCount = 0;
noway_assert(!"Switch must be lowered at this point");
break;
case GT_JMPTABLE:
- info->srcCount = 0;
- assert(info->dstCount == 1);
+ srcCount = 0;
+ assert(dstCount == 1);
+ BuildDef(tree);
break;
case GT_SWITCH_TABLE:
- info->internalIntCount = 1;
- assert(info->dstCount == 0);
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->srcCount == 2);
- break;
+ {
+ assert(dstCount == 0);
+ buildInternalIntRegisterDefForNode(tree);
+ srcCount = BuildBinaryUses(tree->AsOp());
+ buildInternalRegisterUses();
+ assert(srcCount == 2);
+ }
+ break;
case GT_ASG:
noway_assert(!"We should never hit any assignment operator in lowering");
- info->srcCount = 0;
+ srcCount = 0;
break;
#if !defined(_TARGET_64BIT_)
@@ -255,23 +299,32 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_AND:
case GT_OR:
case GT_XOR:
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(dstCount == 1);
+ BuildDef(tree);
+ break;
+
case GT_BT:
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ srcCount = BuildBinaryUses(tree->AsOp());
+ assert(dstCount == 0);
break;
case GT_RETURNTRAP:
+ {
// This just turns into a compare of its child with an int + a conditional call.
- info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
- assert(info->dstCount == 0);
- info->internalIntCount = 1;
- info->setInternalCandidates(this, allRegs(TYP_INT));
- break;
+ RefPosition* internalDef = buildInternalIntRegisterDefForNode(tree);
+ srcCount = BuildOperandUses(tree->gtGetOp1());
+ buildInternalRegisterUses();
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+ BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+ }
+ break;
case GT_MOD:
case GT_DIV:
case GT_UMOD:
case GT_UDIV:
- BuildModDiv(tree->AsOp());
+ srcCount = BuildModDiv(tree->AsOp());
break;
case GT_MUL:
@@ -279,42 +332,39 @@ void LinearScan::BuildNode(GenTree* tree)
#if defined(_TARGET_X86_)
case GT_MUL_LONG:
#endif
- BuildMul(tree->AsOp());
+ srcCount = BuildMul(tree->AsOp());
break;
case GT_INTRINSIC:
- BuildIntrinsic(tree->AsOp());
+ srcCount = BuildIntrinsic(tree->AsOp());
break;
#ifdef FEATURE_SIMD
case GT_SIMD:
- BuildSIMD(tree->AsSIMD());
+ srcCount = BuildSIMD(tree->AsSIMD());
break;
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
case GT_HWIntrinsic:
- BuildHWIntrinsic(tree->AsHWIntrinsic());
+ srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic());
break;
#endif // FEATURE_HW_INTRINSICS
case GT_CAST:
- BuildCast(tree);
+ srcCount = BuildCast(tree);
break;
case GT_BITCAST:
{
- LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
- locationInfo->info.isTgtPref = true;
- useList.Append(locationInfo);
- info->srcCount = 1;
- info->dstCount = 1;
+ assert(dstCount == 1);
+ tgtPrefUse = BuildUse(tree->gtGetOp1());
+ BuildDef(tree);
+ srcCount = 1;
}
break;
case GT_NEG:
- info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
-
// TODO-XArch-CQ:
// SSE instruction set doesn't have an instruction to negate a number.
// The recommended way is to xor the float/double number with a bitmask.
@@ -333,13 +383,21 @@ void LinearScan::BuildNode(GenTree* tree)
// Jit64 and could possibly result in compat issues (?).
if (varTypeIsFloating(tree))
{
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, internalFloatRegCandidates());
+
+ RefPosition* internalDef = buildInternalFloatRegisterDefForNode(tree, internalFloatRegCandidates());
+ srcCount = BuildOperandUses(tree->gtGetOp1());
+ buildInternalRegisterUses();
}
+ else
+ {
+ srcCount = BuildOperandUses(tree->gtGetOp1());
+ }
+ BuildDef(tree);
break;
case GT_NOT:
- info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
+ srcCount = BuildOperandUses(tree->gtGetOp1());
+ BuildDef(tree);
break;
case GT_LSH:
@@ -351,7 +409,7 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_LSH_HI:
case GT_RSH_LO:
#endif
- (void)BuildShiftRotate(tree);
+ srcCount = BuildShiftRotate(tree);
break;
case GT_EQ:
@@ -363,57 +421,94 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_CMP:
- BuildCmp(tree);
+ srcCount = BuildCmp(tree);
break;
case GT_CKFINITE:
- appendLocationInfoToList(tree->gtOp.gtOp1);
- info->srcCount = 1;
- assert(info->dstCount == 1);
- info->internalIntCount = 1;
- break;
+ {
+ assert(dstCount == 1);
+ RefPosition* internalDef = buildInternalIntRegisterDefForNode(tree);
+ srcCount = BuildOperandUses(tree->gtGetOp1());
+ buildInternalRegisterUses();
+ BuildDef(tree);
+ }
+ break;
case GT_CMPXCHG:
{
- info->srcCount = 3;
- assert(info->dstCount == 1);
-
- // comparand is preferenced to RAX.
- // Remaining two operands can be in any reg other than RAX.
- LocationInfoListNode* locationInfo = getLocationInfo(tree->gtCmpXchg.gtOpLocation);
- locationInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
- useList.Append(locationInfo);
- LocationInfoListNode* valueInfo = getLocationInfo(tree->gtCmpXchg.gtOpValue);
- valueInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
- useList.Append(valueInfo);
- info->setDstCandidates(this, RBM_RAX);
- LocationInfoListNode* comparandInfo = getLocationInfo(tree->gtCmpXchg.gtOpComparand);
- comparandInfo->info.setSrcCandidates(this, RBM_RAX);
- useList.Append(comparandInfo);
+ srcCount = 3;
+ assert(dstCount == 1);
+
+ // Comparand is preferenced to RAX.
+ // The remaining two operands can be in any reg other than RAX.
+ BuildUse(tree->gtCmpXchg.gtOpLocation, allRegs(TYP_INT) & ~RBM_RAX);
+ BuildUse(tree->gtCmpXchg.gtOpValue, allRegs(TYP_INT) & ~RBM_RAX);
+ BuildUse(tree->gtCmpXchg.gtOpComparand, RBM_RAX);
+ BuildDef(tree, RBM_RAX);
}
break;
+ case GT_XADD:
+ case GT_XCHG:
case GT_LOCKADD:
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
- break;
+ {
+ // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
+ // to special case them.
+ // These tree nodes will have their op1 marked as isDelayFree=true.
+ // That is, op1's reg remains in use until the subsequent instruction.
+ GenTree* addr = tree->gtGetOp1();
+ GenTree* data = tree->gtGetOp2();
+ assert(!addr->isContained());
+ RefPosition* addrUse = BuildUse(addr);
+ setDelayFree(addrUse);
+ tgtPrefUse = addrUse;
+ srcCount = 1;
+ dstCount = 1;
+ if (!data->isContained())
+ {
+ RefPosition* dataUse = dataUse = BuildUse(data);
+ srcCount = 2;
+ }
+
+ if (tree->TypeGet() == TYP_VOID)
+ {
+ // Right now a GT_XADD node could be morphed into a
+ // GT_LOCKADD of TYP_VOID. See gtExtractSideEffList().
+ // Note that it is advantageous to use GT_LOCKADD
+ // instead of of GT_XADD as the former uses lock.add,
+ // which allows its second operand to be a contained
+ // immediate wheres xadd instruction requires its
+ // second operand to be in a register.
+ // Give it an artificial type and mark it as an unused value.
+ // This results in a Def position created but not considered consumed by its parent node.
+ tree->gtType = TYP_INT;
+ isLocalDefUse = true;
+ tree->SetUnusedValue();
+ }
+ BuildDef(tree);
+ }
+ break;
case GT_PUTARG_REG:
- BuildPutArgReg(tree->AsUnOp());
+ srcCount = BuildPutArgReg(tree->AsUnOp());
break;
case GT_CALL:
- BuildCall(tree->AsCall());
+ srcCount = BuildCall(tree->AsCall());
+ if (tree->AsCall()->HasMultiRegRetVal())
+ {
+ dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+ }
break;
case GT_ADDR:
{
// For a GT_ADDR, the child node should not be evaluated into a register
- GenTree* child = tree->gtOp.gtOp1;
+ GenTree* child = tree->gtGetOp1();
assert(!isCandidateLocalRef(child));
assert(child->isContained());
- assert(info->dstCount == 1);
- info->srcCount = 0;
+ assert(dstCount == 1);
+ srcCount = 0;
}
break;
@@ -424,28 +519,29 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_DYN_BLK:
// These should all be eliminated prior to Lowering.
assert(!"Non-store block node in Lowering");
- info->srcCount = 0;
+ srcCount = 0;
break;
#ifdef FEATURE_PUT_STRUCT_ARG_STK
case GT_PUTARG_STK:
- BuildPutArgStk(tree->AsPutArgStk());
+ srcCount = BuildPutArgStk(tree->AsPutArgStk());
break;
#endif // FEATURE_PUT_STRUCT_ARG_STK
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- BuildBlockStore(tree->AsBlk());
+ srcCount = BuildBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
// Always a passthrough of its child's value.
assert(!"INIT_VAL should always be contained");
+ srcCount = 0;
break;
case GT_LCLHEAP:
- BuildLclHeap(tree);
+ srcCount = BuildLclHeap(tree);
break;
case GT_ARR_BOUNDS_CHECK:
@@ -456,101 +552,113 @@ void LinearScan::BuildNode(GenTree* tree)
case GT_HW_INTRINSIC_CHK:
#endif // FEATURE_HW_INTRINSICS
// Consumes arrLen & index - has no result
- info->srcCount = 2;
- assert(info->dstCount == 0);
- info->srcCount = GetOperandInfo(tree->AsBoundsChk()->gtIndex);
- info->srcCount += GetOperandInfo(tree->AsBoundsChk()->gtArrLen);
+ srcCount = 2;
+ assert(dstCount == 0);
+ srcCount = BuildOperandUses(tree->AsBoundsChk()->gtIndex);
+ srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen);
break;
case GT_ARR_ELEM:
// These must have been lowered to GT_ARR_INDEX
noway_assert(!"We should never see a GT_ARR_ELEM after Lowering.");
- info->srcCount = 0;
+ srcCount = 0;
break;
case GT_ARR_INDEX:
{
- info->srcCount = 2;
- assert(info->dstCount == 1);
+ srcCount = 2;
+ assert(dstCount == 1);
assert(!tree->AsArrIndex()->ArrObj()->isContained());
assert(!tree->AsArrIndex()->IndexExpr()->isContained());
// For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
// times while the result is being computed.
- LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj());
- arrObjInfo->info.isDelayFree = true;
- useList.Append(arrObjInfo);
- useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr()));
- info->hasDelayFreeSrc = true;
+ RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
+ setDelayFree(arrObjUse);
+ BuildUse(tree->AsArrIndex()->IndexExpr());
+ BuildDef(tree);
}
break;
case GT_ARR_OFFSET:
+ {
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- assert(info->dstCount == 1);
+ assert(dstCount == 1);
+ srcCount = 0;
+ RefPosition* internalDef = nullptr;
if (tree->gtArrOffs.gtOffset->isContained())
{
- info->srcCount = 2;
+ srcCount = 2;
}
else
{
// Here we simply need an internal register, which must be different
// from any of the operand's registers, but may be the same as targetReg.
- info->srcCount = 3;
- info->internalIntCount = 1;
- appendLocationInfoToList(tree->AsArrOffs()->gtOffset);
+ srcCount = 3;
+ internalDef = buildInternalIntRegisterDefForNode(tree);
+ BuildUse(tree->AsArrOffs()->gtOffset);
}
- appendLocationInfoToList(tree->AsArrOffs()->gtIndex);
- appendLocationInfoToList(tree->AsArrOffs()->gtArrObj);
- break;
+ BuildUse(tree->AsArrOffs()->gtIndex);
+ BuildUse(tree->AsArrOffs()->gtArrObj);
+ if (internalDef != nullptr)
+ {
+ buildInternalRegisterUses();
+ }
+ BuildDef(tree);
+ }
+ break;
case GT_LEA:
// The LEA usually passes its operands through to the GT_IND, in which case it will
// be contained, but we may be instantiating an address, in which case we set them here.
- info->srcCount = 0;
- assert(info->dstCount == 1);
+ srcCount = 0;
+ assert(dstCount == 1);
if (tree->AsAddrMode()->HasBase())
{
- info->srcCount++;
- appendLocationInfoToList(tree->AsAddrMode()->Base());
+ srcCount++;
+ BuildUse(tree->AsAddrMode()->Base());
}
if (tree->AsAddrMode()->HasIndex())
{
- info->srcCount++;
- appendLocationInfoToList(tree->AsAddrMode()->Index());
+ srcCount++;
+ BuildUse(tree->AsAddrMode()->Index());
}
+ BuildDef(tree);
break;
case GT_STOREIND:
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
- BuildGCWriteBarrier(tree);
+ srcCount = BuildGCWriteBarrier(tree);
break;
}
- BuildIndir(tree->AsIndir());
+ srcCount = BuildIndir(tree->AsIndir());
break;
case GT_NULLCHECK:
- assert(info->dstCount == 0);
- appendLocationInfoToList(tree->gtOp.gtOp1);
- info->srcCount = 1;
+ {
+ assert(dstCount == 0);
+ regMaskTP indirCandidates = RBM_NONE;
+ BuildUse(tree->gtGetOp1(), indirCandidates);
+ srcCount = 1;
break;
+ }
case GT_IND:
- BuildIndir(tree->AsIndir());
- assert(info->dstCount == 1);
+ srcCount = BuildIndir(tree->AsIndir());
+ assert(dstCount == 1);
break;
case GT_CATCH_ARG:
- info->srcCount = 0;
- assert(info->dstCount == 1);
- info->setDstCandidates(this, RBM_EXCEPTION_OBJECT);
+ srcCount = 0;
+ assert(dstCount == 1);
+ BuildDef(tree, RBM_EXCEPTION_OBJECT);
break;
#if !FEATURE_EH_FUNCLETS
case GT_END_LFIN:
- info->srcCount = 0;
- assert(info->dstCount == 0);
+ srcCount = 0;
+ assert(dstCount == 0);
break;
#endif
@@ -561,12 +669,12 @@ void LinearScan::BuildNode(GenTree* tree)
break;
case GT_INDEX_ADDR:
- assert(info->dstCount == 1);
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
-
+ {
+ assert(dstCount == 1);
+ RefPosition* internalDef = nullptr;
if (tree->AsIndexAddr()->Index()->TypeGet() == TYP_I_IMPL)
{
- info->internalIntCount = 1;
+ internalDef = buildInternalIntRegisterDefForNode(tree);
}
else
{
@@ -579,212 +687,56 @@ void LinearScan::BuildNode(GenTree* tree)
break;
default:
- info->internalIntCount = 1;
+ internalDef = buildInternalIntRegisterDefForNode(tree);
break;
}
}
- break;
- } // end switch (tree->OperGet())
-
- // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
- // Even then we would like to set isTgtPref on Op1.
- if (tree->OperIsBinary() && info->srcCount >= 1)
- {
- if (isRMWRegOper(tree))
- {
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
-
- // Commutative opers like add/mul/and/or/xor could reverse the order of
- // operands if it is safe to do so. In such a case we would like op2 to be
- // target preferenced instead of op1.
- if (tree->OperIsCommutative() && op1->isContained() && op2 != nullptr)
- {
- op1 = op2;
- op2 = tree->gtOp.gtOp1;
- }
-
- // If we have a read-modify-write operation, we want to preference op1 to the target,
- // if it is not contained.
- if (!op1->isContained() && !op1->OperIs(GT_LIST))
- {
- useList.GetTreeNodeInfo(op1).isTgtPref = true;
- }
-
- // Is this a non-commutative operator, or is op2 a contained memory op?
- // In either case, we need to make op2 remain live until the op is complete, by marking
- // the source(s) associated with op2 as "delayFree" if this node defines a register.
- // Note that if op2 of a binary RMW operator is a memory op, even if the operator
- // is commutative, codegen cannot reverse them.
- // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
- // more work to be done to correctly reverse the operands if they involve memory
- // operands. Also, we may need to handle more cases than GT_IND, especially once
- // we've modified the register allocator to not require all nodes to be assigned
- // a register (e.g. a spilled lclVar can often be referenced directly from memory).
- // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
-
- GenTree* delayUseSrc = nullptr;
- // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
- // to special case them.
- if (tree->OperGet() == GT_XADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_LOCKADD)
- {
- // These tree nodes will have their op1 marked as isDelayFree=true.
- // Hence these tree nodes should have a Def position so that op1's reg
- // gets freed at DefLoc+1.
- if (tree->TypeGet() == TYP_VOID)
- {
- // Right now a GT_XADD node could be morphed into a
- // GT_LOCKADD of TYP_VOID. See gtExtractSideEffList().
- // Note that it is advantageous to use GT_LOCKADD
- // instead of of GT_XADD as the former uses lock.add,
- // which allows its second operand to be a contained
- // immediate wheres xadd instruction requires its
- // second operand to be in a register.
- assert(info->dstCount == 0);
-
- // Give it an artificial type and mark it as an unused value.
- // This results in a Def position created but not considered consumed by its parent node.
- tree->gtType = TYP_INT;
- info->dstCount = 1;
- info->isLocalDefUse = true;
- tree->SetUnusedValue();
- }
- else
- {
- assert(info->dstCount != 0);
- }
-
- delayUseSrc = op1;
- }
- else if ((info->dstCount != 0) && (op2 != nullptr) &&
- (!tree->OperIsCommutative() || (op2->isContained() && !op2->IsCnsIntOrI())))
+ srcCount = BuildBinaryUses(tree->AsOp());
+ if (internalDef != nullptr)
{
- delayUseSrc = op2;
- }
- if ((delayUseSrc != nullptr) && CheckAndSetDelayFree(delayUseSrc))
- {
- info->hasDelayFreeSrc = true;
+ buildInternalRegisterUses();
}
+ BuildDef(tree);
}
- }
+ break;
- BuildCheckByteable(tree);
+ } // end switch (tree->OperGet())
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
- assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
- assert(!tree->IsUnusedValue() || (info->dstCount != 0));
- assert(info->dstCount == tree->GetRegisterDstCount());
+ // We need to be sure that we've set srcCount and dstCount appropriately
+ assert((dstCount < 2) || (tree->IsMultiRegCall() && dstCount == MAX_RET_REG_COUNT));
+ assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
+ assert(!tree->IsUnusedValue() || (dstCount != 0));
+ assert(dstCount == tree->GetRegisterDstCount());
+ INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount));
+ return srcCount;
}
-//---------------------------------------------------------------------
-// CheckAndSetDelayFree - Set isDelayFree on the given operand or its child(ren), if appropriate
-//
-// Arguments
-// delayUseSrc - a node that may have a delayed use
-//
-// Return Value:
-// True iff the node or one of its children has been marked isDelayFree
-//
-// Notes:
-// Only register operands should be marked isDelayFree, not contained immediates or memory.
-//
-bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc)
+GenTree* LinearScan::getTgtPrefOperand(GenTreeOp* tree)
{
- // If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
- // on the base & index, if any.
- // Otherwise, we set it on delayUseSrc itself.
- bool returnValue = false;
- if (delayUseSrc->isContained())
- {
- // If delayUseSrc is a non-Indir contained node (e.g. a local) there's no register use to delay.
- if (delayUseSrc->isIndir())
- {
- GenTree* base = delayUseSrc->AsIndir()->Base();
- GenTree* index = delayUseSrc->AsIndir()->Index();
- if ((base != nullptr) && !base->isContained())
- {
- useList.GetTreeNodeInfo(base).isDelayFree = true;
- returnValue = true;
- }
- if (index != nullptr)
- {
- assert(!index->isContained());
- useList.GetTreeNodeInfo(index).isDelayFree = true;
- returnValue = true;
- }
- }
- }
- else
+ // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
+ // Even then we would like to set isTgtPref on Op1.
+ if (tree->OperIsBinary() && isRMWRegOper(tree))
{
- useList.GetTreeNodeInfo(delayUseSrc).isDelayFree = true;
- returnValue = true;
- }
- return returnValue;
-}
-
-//------------------------------------------------------------------------
-// BuildCheckByteable: Check the tree to see if "byte-able" registers are
-// required, and set the tree node info accordingly.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void LinearScan::BuildCheckByteable(GenTree* tree)
-{
-#ifdef _TARGET_X86_
- TreeNodeInfo* info = currentNodeInfo;
- // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
- // if the tree node is a byte type.
- //
- // Though this looks conservative in theory, in practice we could not think of a case where
- // the below logic leads to conservative register specification. In future when or if we find
- // one such case, this logic needs to be fine tuned for that case(s).
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
- if (ExcludeNonByteableRegisters(tree))
- {
- regMaskTP regMask;
- if (info->dstCount > 0)
+ // Commutative opers like add/mul/and/or/xor could reverse the order of
+ // operands if it is safe to do so. In such a case we would like op2 to be
+ // target preferenced instead of op1.
+ if (tree->OperIsCommutative() && op1->isContained() && op2 != nullptr)
{
- regMask = info->getDstCandidates(this);
- assert(regMask != RBM_NONE);
- info->setDstCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
+ op1 = op2;
+ op2 = tree->gtGetOp1();
}
- if (tree->OperIsSimple())
+ // If we have a read-modify-write operation, we want to preference op1 to the target,
+ // if it is not contained.
+ if (!op1->isContained() && !op1->OperIs(GT_LIST))
{
- GenTree* op = tree->gtOp.gtOp1;
- // We need byte registers on the operands of most simple operators that produce a byte result.
- // However, indirections are simple operators but do not require their address in a byte register.
- if ((op != nullptr) && !tree->OperIsIndir())
- {
- // No need to set src candidates on a contained child operand.
- if (!op->isContained())
- {
- TreeNodeInfo& op1Info = useList.GetTreeNodeInfo(op);
- regMask = op1Info.getSrcCandidates(this);
- assert(regMask != RBM_NONE);
- op1Info.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
-
- if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
- {
- op = tree->gtOp.gtOp2;
- if (!op->isContained())
- {
- TreeNodeInfo& op2Info = useList.GetTreeNodeInfo(op);
- regMask = op2Info.getSrcCandidates(this);
- assert(regMask != RBM_NONE);
- op2Info.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
+ return op1;
}
}
-#endif //_TARGET_X86_
+ return nullptr;
}
//------------------------------------------------------------------------------
@@ -805,7 +757,7 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
// For now, We assume that most binary operators are of the RMW form.
assert(tree->OperIsBinary());
- if (tree->OperIsCompare() || tree->OperIs(GT_CMP))
+ if (tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_BT))
{
return false;
}
@@ -818,11 +770,15 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
case GT_ARR_INDEX:
case GT_STORE_BLK:
case GT_STORE_OBJ:
+ case GT_SWITCH_TABLE:
+#ifdef _TARGET_X86_
+ case GT_LONG:
+#endif
return false;
// x86/x64 does support a three op multiply when op2|op1 is a contained immediate
case GT_MUL:
- return (!tree->gtOp.gtOp2->isContainedIntOrIImmed() && !tree->gtOp.gtOp1->isContainedIntOrIImmed());
+ return (!tree->gtGetOp2()->isContainedIntOrIImmed() && !tree->gtGetOp1()->isContainedIntOrIImmed());
#ifdef FEATURE_HW_INTRINSICS
case GT_HWIntrinsic:
@@ -834,6 +790,114 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
}
}
+// Support for building RefPositions for RMW nodes.
+int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates)
+{
+ int srcCount = 0;
+ GenTree* op1 = node->gtOp1;
+ GenTree* op2 = node->gtGetOp2IfPresent();
+ bool isReverseOp = node->IsReverseOp();
+ regMaskTP op1Candidates = candidates;
+ regMaskTP op2Candidates = candidates;
+
+#ifdef _TARGET_X86_
+ if (varTypeIsByte(node))
+ {
+ regMaskTP byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs());
+ if (!op1->isContained())
+ {
+ assert(byteCandidates != RBM_NONE);
+ op1Candidates = byteCandidates;
+ }
+ if (node->OperIsCommutative() && !op2->isContained())
+ {
+ assert(byteCandidates != RBM_NONE);
+ op2Candidates = byteCandidates;
+ }
+ }
+#endif // _TARGET_X86_
+
+ GenTree* tgtPrefOperand = getTgtPrefOperand(node);
+ assert((tgtPrefOperand == nullptr) || (tgtPrefOperand == op1) || node->OperIsCommutative());
+ assert(!isReverseOp || node->OperIsCommutative());
+
+ // Determine which operand, if any, should be delayRegFree. Normally, this would be op2,
+ // but if we have a commutative operator and op1 is a contained memory op, it would be op1.
+ // We need to make the delayRegFree operand remain live until the op is complete, by marking
+ // the source(s) associated with op2 as "delayFree".
+ // Note that if op2 of a binary RMW operator is a memory op, even if the operator
+ // is commutative, codegen cannot reverse them.
+ // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
+ // more work to be done to correctly reverse the operands if they involve memory
+ // operands. Also, we may need to handle more cases than GT_IND, especially once
+ // we've modified the register allocator to not require all nodes to be assigned
+ // a register (e.g. a spilled lclVar can often be referenced directly from memory).
+ // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
+ GenTree* delayUseOperand = op2;
+ if (node->OperIsCommutative())
+ {
+ if (op1->isContained() && op2 != nullptr)
+ {
+ delayUseOperand = op1;
+ }
+ else if (!op2->isContained() || op2->IsCnsIntOrI())
+ {
+ // If we have a commutative operator and op2 is not a memory op, we don't need
+ // to set delayRegFree on either operand because codegen can swap them.
+ delayUseOperand = nullptr;
+ }
+ }
+ else if (op1->isContained())
+ {
+ delayUseOperand = nullptr;
+ }
+ if (delayUseOperand != nullptr)
+ {
+ assert(delayUseOperand != tgtPrefOperand);
+ }
+
+ if (isReverseOp)
+ {
+ op1 = op2;
+ op2 = node->gtOp1;
+ }
+
+ // Build first use
+ if (tgtPrefOperand == op1)
+ {
+ assert(!op1->isContained());
+ tgtPrefUse = BuildUse(op1, op1Candidates);
+ srcCount++;
+ }
+ else if (delayUseOperand == op1)
+ {
+ srcCount += BuildDelayFreeUses(op1, op1Candidates);
+ }
+ else
+ {
+ srcCount += BuildOperandUses(op1, op1Candidates);
+ }
+ // Build second use
+ if (op2 != nullptr)
+ {
+ if (tgtPrefOperand == op2)
+ {
+ assert(!op2->isContained());
+ tgtPrefUse = BuildUse(op2, op2Candidates);
+ srcCount++;
+ }
+ else if (delayUseOperand == op2)
+ {
+ srcCount += BuildDelayFreeUses(op2, op2Candidates);
+ }
+ else
+ {
+ srcCount += BuildOperandUses(op2, op2Candidates);
+ }
+ }
+ return srcCount;
+}
+
//------------------------------------------------------------------------
// BuildShiftRotate: Set the NodeInfo for a shift or rotate.
//
@@ -841,36 +905,29 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
// tree - The node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
int LinearScan::BuildShiftRotate(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
// For shift operations, we need that the number
// of bits moved gets stored in CL in case
// the number of bits to shift is not a constant.
- int srcCount = 0;
- GenTree* shiftBy = tree->gtOp.gtOp2;
- GenTree* source = tree->gtOp.gtOp1;
- LocationInfoListNode* shiftByInfo = nullptr;
+ int srcCount = 0;
+ GenTree* shiftBy = tree->gtGetOp2();
+ GenTree* source = tree->gtGetOp1();
+ regMaskTP srcCandidates = RBM_NONE;
+ regMaskTP dstCandidates = RBM_NONE;
+
// x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
// We will allow whatever can be encoded - hope you know what you are doing.
if (shiftBy->isContained())
{
- srcCount += GetOperandInfo(source);
+ assert(shiftBy->OperIsConst());
}
else
{
- srcCount++;
- shiftByInfo = getLocationInfo(shiftBy);
- shiftByInfo->info.setSrcCandidates(this, RBM_RCX);
- info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
- LocationInfoListNode* sourceInfo;
- srcCount += GetOperandInfo(source, &sourceInfo);
- for (; sourceInfo != nullptr; sourceInfo = sourceInfo->Next())
- {
- sourceInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
- }
+ srcCandidates = allRegs(TYP_INT) & ~RBM_RCX;
+ dstCandidates = allRegs(TYP_INT) & ~RBM_RCX;
}
// Note that Rotate Left/Right instructions don't set ZF and SF flags.
@@ -887,40 +944,54 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
#ifdef _TARGET_X86_
// The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
- // we can have a three operand form. Increment the srcCount.
+ // we can have a three operand form.
if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
{
assert((source->OperGet() == GT_LONG) && source->isContained());
- GenTree* sourceLo = source->gtOp.gtOp1;
- LocationInfoListNode* sourceLoInfo = useList.Begin();
- LocationInfoListNode* sourceHiInfo = useList.GetSecond(INDEBUG(source->gtGetOp2()));
+ GenTree* sourceLo = source->gtGetOp1();
+ GenTree* sourceHi = source->gtGetOp2();
+ assert(!sourceLo->isContained() && !sourceHi->isContained());
+ RefPosition* sourceLoUse = BuildUse(sourceLo, srcCandidates);
+ RefPosition* sourceHiUse = BuildUse(sourceHi, srcCandidates);
- info->hasDelayFreeSrc = true;
- if (tree->OperGet() == GT_LSH_HI)
+ if (!tree->isContained())
{
- sourceLoInfo->info.isDelayFree = true;
- }
- else
- {
- sourceHiInfo->info.isDelayFree = true;
+ if (tree->OperGet() == GT_LSH_HI)
+ {
+ setDelayFree(sourceLoUse);
+ }
+ else
+ {
+ setDelayFree(sourceHiUse);
+ }
}
}
+ else
#endif
- if (shiftByInfo != nullptr)
+ if (!source->isContained())
{
- if (tree->IsReverseOp())
- {
- useList.Prepend(shiftByInfo);
- }
- else
+ tgtPrefUse = BuildUse(source, srcCandidates);
+ srcCount++;
+ }
+ else
+ {
+ srcCount += BuildOperandUses(source, srcCandidates);
+ }
+ if (!tree->isContained())
+ {
+ if (!shiftBy->isContained())
{
- useList.Append(shiftByInfo);
+ srcCount += BuildDelayFreeUses(shiftBy, RBM_RCX);
}
+ BuildDef(tree, dstCandidates);
}
- if (!tree->isContained())
+ else
{
- info->srcCount = srcCount;
+ if (!shiftBy->isContained())
+ {
+ srcCount += BuildOperandUses(shiftBy, RBM_RCX);
+ }
}
return srcCount;
}
@@ -932,51 +1003,38 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
// call - The call node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildCall(GenTreeCall* call)
+int LinearScan::BuildCall(GenTreeCall* call)
{
- TreeNodeInfo* info = currentNodeInfo;
bool hasMultiRegRetVal = false;
ReturnTypeDesc* retTypeDesc = nullptr;
+ int srcCount = 0;
+ int dstCount = 0;
+ regMaskTP dstCandidates = RBM_NONE;
assert(!call->isContained());
- info->srcCount = 0;
if (call->TypeGet() != TYP_VOID)
{
hasMultiRegRetVal = call->HasMultiRegRetVal();
if (hasMultiRegRetVal)
{
// dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
+ retTypeDesc = call->GetReturnTypeDesc();
+ dstCount = retTypeDesc->GetReturnRegCount();
}
else
{
- assert(info->dstCount == 1);
+ dstCount = 1;
}
}
- else
- {
- assert(info->dstCount == 0);
- }
- GenTree* ctrlExpr = call->gtControlExpr;
- LocationInfoListNode* ctrlExprInfo = nullptr;
+ GenTree* ctrlExpr = call->gtControlExpr;
if (call->gtCallType == CT_INDIRECT)
{
ctrlExpr = call->gtCallAddr;
}
- // If this is a varargs call, we will clear the internal candidates in case we need
- // to reserve some integer registers for copying float args.
- // We have to do this because otherwise the default candidates are allRegs, and adding
- // the individual specific registers will have no effect.
- if (call->IsVarargs())
- {
- info->setInternalCandidates(this, RBM_NONE);
- }
-
RegisterType registerType = call->TypeGet();
// Set destination candidates for return value of the call.
@@ -988,31 +1046,32 @@ void LinearScan::BuildCall(GenTreeCall* call)
// The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
// TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
// correct argument registers.
- info->setDstCandidates(this, RBM_PINVOKE_TCB);
+ dstCandidates = RBM_PINVOKE_TCB;
}
else
#endif // _TARGET_X86_
if (hasMultiRegRetVal)
{
assert(retTypeDesc != nullptr);
- info->setDstCandidates(this, retTypeDesc->GetABIReturnRegs());
+ dstCandidates = retTypeDesc->GetABIReturnRegs();
+ assert((int)genCountBits(dstCandidates) == dstCount);
}
else if (varTypeIsFloating(registerType))
{
#ifdef _TARGET_X86_
// The return value will be on the X87 stack, and we will need to move it.
- info->setDstCandidates(this, allRegs(registerType));
+ dstCandidates = allRegs(registerType);
#else // !_TARGET_X86_
- info->setDstCandidates(this, RBM_FLOATRET);
+ dstCandidates = RBM_FLOATRET;
#endif // !_TARGET_X86_
}
else if (registerType == TYP_LONG)
{
- info->setDstCandidates(this, RBM_LNGRET);
+ dstCandidates = RBM_LNGRET;
}
else
{
- info->setDstCandidates(this, RBM_INTRET);
+ dstCandidates = RBM_INTRET;
}
// number of args to a call =
@@ -1022,11 +1081,28 @@ void LinearScan::BuildCall(GenTreeCall* call)
bool callHasFloatRegArgs = false;
bool isVarArgs = call->IsVarargs();
- // First, count reg args
+ // First, determine internal registers.
+ // We will need one for any float arguments to a varArgs call.
for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->OperIsList());
+ GenTree* argNode = list->Current();
+ if (argNode->OperIsPutArgReg())
+ {
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+ }
+ else if (argNode->OperGet() == GT_FIELD_LIST)
+ {
+ for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
+ {
+ assert(entry->Current()->OperIsPutArgReg());
+ HandleFloatVarArgs(call, entry->Current(), &callHasFloatRegArgs);
+ }
+ }
+ }
+ // Now, count reg args
+ for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
// By this point, lowering has ensured that all call arguments are one of the following:
// - an arg setup store
// - an arg placeholder
@@ -1041,9 +1117,8 @@ void LinearScan::BuildCall(GenTreeCall* call)
// Each register argument corresponds to one source.
if (argNode->OperIsPutArgReg())
{
- info->srcCount++;
- HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
- appendLocationInfoToList(argNode);
+ srcCount++;
+ BuildUse(argNode, genRegMask(argNode->gtRegNum));
}
#ifdef UNIX_AMD64_ABI
else if (argNode->OperGet() == GT_FIELD_LIST)
@@ -1051,9 +1126,8 @@ void LinearScan::BuildCall(GenTreeCall* call)
for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
{
assert(entry->Current()->OperIsPutArgReg());
- info->srcCount++;
- HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
- appendLocationInfoToList(entry->Current());
+ srcCount++;
+ BuildUse(entry->Current(), genRegMask(entry->Current()->gtRegNum));
}
}
#endif // UNIX_AMD64_ABI
@@ -1077,8 +1151,8 @@ void LinearScan::BuildCall(GenTreeCall* call)
// Note that if it is a SIMD type the argument will be in a register.
if (argNode->TypeGet() == TYP_STRUCT)
{
- assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
- assert(argNode->gtOp.gtOp1->isContained());
+ assert(argNode->gtGetOp1() != nullptr && argNode->gtGetOp1()->OperGet() == GT_OBJ);
+ assert(argNode->gtGetOp1()->isContained());
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
continue;
@@ -1117,39 +1191,30 @@ void LinearScan::BuildCall(GenTreeCall* call)
GenTree* args = call->gtCallArgs;
while (args)
{
- GenTree* arg = args->gtOp.gtOp1;
+ GenTree* arg = args->gtGetOp1();
if (!(arg->gtFlags & GTF_LATE_ARG) && !arg)
{
if (arg->IsValue() && !arg->isContained())
{
- // argInfo->isLocalDefUse = true;
assert(arg->IsUnusedValue());
}
- // assert(argInfo->dstCount == 0);
}
- args = args->gtOp.gtOp2;
+ args = args->gtGetOp2();
}
// set reg requirements on call target represented as control sequence.
if (ctrlExpr != nullptr)
{
- LocationInfoListNode* ctrlExprInfo = nullptr;
- int ctrlExprCount = GetOperandInfo(ctrlExpr);
- if (ctrlExprCount != 0)
- {
- assert(ctrlExprCount == 1);
- ctrlExprInfo = useList.Last();
- info->srcCount++;
- }
+ regMaskTP ctrlExprCandidates = RBM_NONE;
// In case of fast tail implemented as jmp, make sure that gtControlExpr is
// computed into a register.
if (call->IsFastTailCall())
{
- assert(!ctrlExpr->isContained() && ctrlExprInfo != nullptr);
+ assert(!ctrlExpr->isContained());
// Fast tail call - make sure that call target is always computed in RAX
// so that epilog sequence can generate "jmp rax" to achieve fast tail call.
- ctrlExprInfo->info.setSrcCandidates(this, RBM_RAX);
+ ctrlExprCandidates = RBM_RAX;
}
#ifdef _TARGET_X86_
else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
@@ -1161,24 +1226,31 @@ void LinearScan::BuildCall(GenTreeCall* call)
//
// Where EAX is also used as an argument to the stub dispatch helper. Make
// sure that the call target address is computed into EAX in this case.
- assert(ctrlExprInfo != nullptr);
assert(ctrlExpr->isIndir() && ctrlExpr->isContained());
- ctrlExprInfo->info.setSrcCandidates(this, RBM_VIRTUAL_STUB_TARGET);
+ ctrlExprCandidates = RBM_VIRTUAL_STUB_TARGET;
}
#endif // _TARGET_X86_
#if FEATURE_VARARG
// If it is a fast tail call, it is already preferenced to use RAX.
// Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExprInfo != nullptr))
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall())
{
// Don't assign the call target to any of the argument registers because
// we will use them to also pass floating point arguments as required
// by Amd64 ABI.
- ctrlExprInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS);
}
#endif // !FEATURE_VARARG
+ srcCount += BuildOperandUses(ctrlExpr, ctrlExprCandidates);
}
+
+ buildInternalRegisterUses();
+
+ // Now generate defs and kills.
+ regMaskTP killMask = getKillSetForCall(call);
+ BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -1188,28 +1260,15 @@ void LinearScan::BuildCall(GenTreeCall* call)
// blkNode - The block store node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
+int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
{
- TreeNodeInfo* info = currentNodeInfo;
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
-
- LocationInfoListNode* dstAddrInfo = nullptr;
- LocationInfoListNode* sourceInfo = nullptr;
- LocationInfoListNode* sizeInfo = nullptr;
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ int srcCount = 0;
- // Sources are dest address, initVal or source.
- // We may require an additional source or temp register for the size.
- if (!dstAddr->isContained())
- {
- info->srcCount++;
- dstAddrInfo = getLocationInfo(dstAddr);
- }
- assert(info->dstCount == 0);
- info->setInternalCandidates(this, RBM_NONE);
GenTree* srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
@@ -1226,11 +1285,6 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
initVal = initVal->gtGetOp1();
}
srcAddrOrFill = initVal;
- if (!initVal->isContained())
- {
- info->srcCount++;
- sourceInfo = getLocationInfo(initVal);
- }
switch (blkNode->gtBlkOpKind)
{
@@ -1239,8 +1293,7 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
if (size >= XMM_REGSIZE_BYTES)
{
// Reserve an XMM register to fill it with a pack of 16 init value constants.
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, internalFloatRegCandidates());
+ buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
// use XMM register to fill with constants, it's AVX instruction and set the flag
SetContainsAVXFlags();
}
@@ -1251,7 +1304,7 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
// a "mov byte ptr [dest], val". If the fill size is odd, we will try to do this
// when unrolling, so only allow byteable registers as the source value. (We could
// consider just using BlkOpKindRepInstr instead.)
- sourceRegMask = RBM_BYTE_REGS;
+ sourceRegMask = allByteRegs();
}
#endif // _TARGET_X86_
break;
@@ -1273,9 +1326,9 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
sourceRegMask = RBM_ARG_1;
blkSizeRegMask = RBM_ARG_2;
#else // !_TARGET_AMD64_
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RAX;
- blkSizeRegMask = RBM_RCX;
+ dstAddrRegMask = RBM_RDI;
+ sourceRegMask = RBM_RAX;
+ blkSizeRegMask = RBM_RCX;
#endif // !_TARGET_AMD64_
break;
@@ -1290,11 +1343,6 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
{
assert(source->isContained());
srcAddrOrFill = source->gtGetOp1();
- if (!srcAddrOrFill->isContained())
- {
- sourceInfo = getLocationInfo(srcAddrOrFill);
- info->srcCount++;
- }
}
if (blkNode->OperGet() == GT_STORE_OBJ)
{
@@ -1320,7 +1368,6 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
// RBM_NON_BYTE_REGS from internal candidates.
if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
{
- info->internalIntCount++;
regMaskTP regMask = allRegs(TYP_INT);
#ifdef _TARGET_X86_
@@ -1329,7 +1376,7 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
regMask &= ~RBM_NON_BYTE_REGS;
}
#endif
- info->setInternalCandidates(this, regMask);
+ buildInternalIntRegisterDefForNode(blkNode, regMask);
}
if (size >= XMM_REGSIZE_BYTES)
@@ -1337,8 +1384,7 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
// If we have a buffer larger than XMM_REGSIZE_BYTES,
// reserve an XMM register to use it for a
// series of 16-byte loads and stores.
- info->internalFloatCount = 1;
- info->addInternalCandidates(this, internalFloatRegCandidates());
+ buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
// Uses XMM reg for load and store and hence check to see whether AVX instructions
// are used for codegen, set ContainsAVX flag
SetContainsAVXFlags();
@@ -1362,9 +1408,9 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
sourceRegMask = RBM_ARG_1;
blkSizeRegMask = RBM_ARG_2;
#else // !_TARGET_AMD64_
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RAX;
- blkSizeRegMask = RBM_RCX;
+ dstAddrRegMask = RBM_RDI;
+ sourceRegMask = RBM_RAX;
+ blkSizeRegMask = RBM_RCX;
#endif // !_TARGET_AMD64_
break;
@@ -1372,60 +1418,48 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
unreached();
}
}
+ if ((srcAddrOrFill == nullptr) && (sourceRegMask != RBM_NONE))
+ {
+ // This is a local source; we'll use a temp register for its address.
+ assert(source->isContained() && source->OperIsLocal());
+ buildInternalIntRegisterDefForNode(blkNode, sourceRegMask);
+ }
}
- if (dstAddrInfo != nullptr)
+ if ((size != 0) && (blkSizeRegMask != RBM_NONE))
{
- if (dstAddrRegMask != RBM_NONE)
- {
- dstAddrInfo->info.setSrcCandidates(this, dstAddrRegMask);
- }
- useList.Append(dstAddrInfo);
+ // Reserve a temp register for the block size argument.
+ buildInternalIntRegisterDefForNode(blkNode, blkSizeRegMask);
}
- if (sourceRegMask != RBM_NONE)
+
+ if (!dstAddr->isContained() && !blkNode->IsReverseOp())
{
- if (sourceInfo != nullptr)
- {
- sourceInfo->info.setSrcCandidates(this, sourceRegMask);
- }
- else
- {
- // This is a local source; we'll use a temp register for its address.
- info->addInternalCandidates(this, sourceRegMask);
- info->internalIntCount++;
- }
+ srcCount++;
+ BuildUse(dstAddr, dstAddrRegMask);
}
- if (sourceInfo != nullptr)
+ if ((srcAddrOrFill != nullptr) && !srcAddrOrFill->isContained())
{
- useList.Add(sourceInfo, blkNode->IsReverseOp());
+ srcCount++;
+ BuildUse(srcAddrOrFill, sourceRegMask);
}
-
- if (blkNode->OperIs(GT_STORE_DYN_BLK))
+ if (!dstAddr->isContained() && blkNode->IsReverseOp())
{
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- info->srcCount++;
-
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- sizeInfo = getLocationInfo(blockSize);
- useList.Add(sizeInfo, blkNode->AsDynBlk()->gtEvalSizeFirst);
+ srcCount++;
+ BuildUse(dstAddr, dstAddrRegMask);
}
- if (blkSizeRegMask != RBM_NONE)
+ if (size == 0)
{
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- info->addInternalCandidates(this, blkSizeRegMask);
- info->internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- assert((blkNode->gtOper == GT_STORE_DYN_BLK) && (sizeInfo != nullptr));
- info->setSrcCount(3);
- sizeInfo->info.setSrcCandidates(this, blkSizeRegMask);
- }
+ assert(blkNode->OperIs(GT_STORE_DYN_BLK));
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ srcCount++;
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ BuildUse(blockSize, blkSizeRegMask);
}
+ buildInternalRegisterUses();
+ regMaskTP killMask = getKillSetForBlockStore(blkNode);
+ BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
+ return srcCount;
}
#ifdef FEATURE_PUT_STRUCT_ARG_STK
@@ -1436,23 +1470,21 @@ void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
// tree - The node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
+int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
{
- TreeNodeInfo* info = currentNodeInfo;
- info->srcCount = 0;
- assert(info->dstCount == 0);
-
+ int srcCount = 0;
if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
{
- putArgStk->gtOp1->SetContained();
+ assert(putArgStk->gtOp1->isContained());
#ifdef _TARGET_X86_
- unsigned fieldCount = 0;
- bool needsByteTemp = false;
- bool needsSimdTemp = false;
- unsigned prevOffset = putArgStk->getArgSize();
+ RefPosition* simdTemp = nullptr;
+ RefPosition* intTemp = nullptr;
+ unsigned prevOffset = putArgStk->getArgSize();
+ // We need to iterate over the fields twice; once to determine the need for internal temps,
+ // and once to actually build the uses.
for (GenTreeFieldList* current = putArgStk->gtOp1->AsFieldList(); current != nullptr; current = current->Rest())
{
GenTree* const fieldNode = current->Current();
@@ -1464,24 +1496,28 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
// Note that we need to check the GT_FIELD_LIST type, not 'fieldType'. This is because the
// GT_FIELD_LIST will be TYP_SIMD12 whereas the fieldType might be TYP_SIMD16 for lclVar, where
// we "round up" to 16.
- if (current->gtFieldType == TYP_SIMD12)
+ if ((current->gtFieldType == TYP_SIMD12) && (simdTemp == nullptr))
{
- needsSimdTemp = true;
+ simdTemp = buildInternalFloatRegisterDefForNode(putArgStk);
}
#endif // defined(FEATURE_SIMD)
- // We can treat as a slot any field that is stored at a slot boundary, where the previous
- // field is not in the same slot. (Note that we store the fields in reverse order.)
- const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
- if (!fieldIsSlot)
+ if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push)
{
- if (varTypeIsByte(fieldType))
+ // We can treat as a slot any field that is stored at a slot boundary, where the previous
+ // field is not in the same slot. (Note that we store the fields in reverse order.)
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
+ if (intTemp == nullptr)
+ {
+ intTemp = buildInternalIntRegisterDefForNode(putArgStk);
+ }
+ if (!fieldIsSlot && varTypeIsByte(fieldType))
{
// If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
// (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
// need a byte-addressable register for the store. We will enforce this requirement on an internal
// register, which we can use to copy multiple byte values.
- needsByteTemp = true;
+ intTemp->registerAssignment &= allByteRegs();
}
}
@@ -1490,38 +1526,19 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
putArgStk->gtNumberReferenceSlots++;
}
prevOffset = fieldOffset;
- fieldCount++;
- if (!fieldNode->isContained())
- {
- appendLocationInfoToList(fieldNode);
- info->srcCount++;
- }
}
-
- if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push)
+ for (GenTreeFieldList* current = putArgStk->gtOp1->AsFieldList(); current != nullptr; current = current->Rest())
{
- // If any of the fields cannot be stored with an actual push, we may need a temporary
- // register to load the value before storing it to the stack location.
- info->internalIntCount = 1;
- regMaskTP regMask = allRegs(TYP_INT);
- if (needsByteTemp)
+ GenTree* const fieldNode = current->Current();
+ if (!fieldNode->isContained())
{
- regMask &= ~RBM_NON_BYTE_REGS;
+ BuildUse(fieldNode);
+ srcCount++;
}
- info->setInternalCandidates(this, regMask);
}
+ buildInternalRegisterUses();
-#if defined(FEATURE_SIMD)
- // For PutArgStk of a TYP_SIMD12, we need a SIMD temp register.
- if (needsSimdTemp)
- {
- assert(info->dstCount == 0);
- info->internalFloatCount += 1;
- info->addInternalCandidates(this, allSIMDRegs());
- }
-#endif // defined(FEATURE_SIMD)
-
- return;
+ return srcCount;
#endif // _TARGET_X86_
}
@@ -1532,25 +1549,22 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
// For PutArgStk of a TYP_SIMD12, we need an extra register.
if (putArgStk->isSIMD12())
{
- appendLocationInfoToList(putArgStk->gtOp1);
- info->srcCount = 1;
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
- return;
+ buildInternalFloatRegisterDefForNode(putArgStk, internalFloatRegCandidates());
+ BuildUse(putArgStk->gtOp1);
+ srcCount = 1;
+ buildInternalRegisterUses();
+ return srcCount;
}
#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
if (type != TYP_STRUCT)
{
- BuildSimple(putArgStk);
- return;
+ return BuildSimple(putArgStk);
}
GenTree* dst = putArgStk;
GenTree* srcAddr = nullptr;
- info->srcCount = GetOperandInfo(src);
-
// If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
// Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
// our framework assemblies, so this is the main code generation scheme we'll use.
@@ -1567,7 +1581,6 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
// RBM_NON_BYTE_REGS from internal candidates.
if ((putArgStk->gtNumberReferenceSlots == 0) && (size & (XMM_REGSIZE_BYTES - 1)) != 0)
{
- info->internalIntCount++;
regMaskTP regMask = allRegs(TYP_INT);
#ifdef _TARGET_X86_
@@ -1576,7 +1589,7 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
regMask &= ~RBM_NON_BYTE_REGS;
}
#endif
- info->setInternalCandidates(this, regMask);
+ buildInternalIntRegisterDefForNode(putArgStk, regMask);
}
#ifdef _TARGET_X86_
@@ -1588,20 +1601,24 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
// If we have a buffer larger than or equal to XMM_REGSIZE_BYTES on x64/ux,
// or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a
// series of 16-byte loads and stores.
- info->internalFloatCount = 1;
- info->addInternalCandidates(this, internalFloatRegCandidates());
+ buildInternalFloatRegisterDefForNode(putArgStk, internalFloatRegCandidates());
SetContainsAVXFlags();
}
break;
case GenTreePutArgStk::Kind::RepInstr:
- info->internalIntCount += 3;
- info->setInternalCandidates(this, (RBM_RDI | RBM_RCX | RBM_RSI));
+ buildInternalIntRegisterDefForNode(putArgStk, RBM_RDI);
+ buildInternalIntRegisterDefForNode(putArgStk, RBM_RCX);
+ buildInternalIntRegisterDefForNode(putArgStk, RBM_RSI);
break;
default:
unreached();
}
+
+ srcCount = BuildOperandUses(src);
+ buildInternalRegisterUses();
+ return srcCount;
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
@@ -1612,13 +1629,11 @@ void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
// tree - The node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildLclHeap(GenTree* tree)
+int LinearScan::BuildLclHeap(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
- info->srcCount = 1;
- assert(info->dstCount == 1);
+ int srcCount = 1;
// Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
// Here '-' means don't care.
@@ -1636,16 +1651,16 @@ void LinearScan::BuildLclHeap(GenTree* tree)
// Note: Here we don't need internal register to be different from targetReg.
// Rather, require it to be different from operand's reg.
- GenTree* size = tree->gtOp.gtOp1;
+ GenTree* size = tree->gtGetOp1();
if (size->IsCnsIntOrI())
{
assert(size->isContained());
- info->srcCount = 0;
+ srcCount = 0;
size_t sizeVal = size->gtIntCon.gtIconVal;
if (sizeVal == 0)
{
- info->internalIntCount = 0;
+ buildInternalIntRegisterDefForNode(tree);
}
else
{
@@ -1658,46 +1673,40 @@ void LinearScan::BuildLclHeap(GenTree* tree)
// we will generate 'push 0'.
assert((sizeVal % REGSIZE_BYTES) == 0);
size_t cntRegSizedWords = sizeVal / REGSIZE_BYTES;
- if (cntRegSizedWords <= 6)
- {
- info->internalIntCount = 0;
- }
- else if (!compiler->info.compInitMem)
+ if (cntRegSizedWords > 6)
{
- // No need to initialize allocated stack space.
- if (sizeVal < compiler->eeGetPageSize())
+ if (!compiler->info.compInitMem)
{
+ // No need to initialize allocated stack space.
+ if (sizeVal < compiler->eeGetPageSize())
+ {
#ifdef _TARGET_X86_
- info->internalIntCount = 1; // x86 needs a register here to avoid generating "sub" on ESP.
-#else // !_TARGET_X86_
- info->internalIntCount = 0;
-#endif // !_TARGET_X86_
- }
- else
- {
- // We need two registers: regCnt and RegTmp
- info->internalIntCount = 2;
+ // x86 needs a register here to avoid generating "sub" on ESP.
+ buildInternalIntRegisterDefForNode(tree);
+#endif
+ }
+ else
+ {
+ // We need two registers: regCnt and RegTmp
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalIntRegisterDefForNode(tree);
+ }
}
}
- else
- {
- // >6 and need to zero initialize allocated stack space.
- info->internalIntCount = 0;
- }
}
}
else
{
- appendLocationInfoToList(size);
if (!compiler->info.compInitMem)
{
- info->internalIntCount = 2;
- }
- else
- {
- info->internalIntCount = 0;
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalIntRegisterDefForNode(tree);
}
+ BuildUse(size);
}
+ buildInternalRegisterUses();
+ BuildDef(tree);
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -1707,20 +1716,19 @@ void LinearScan::BuildLclHeap(GenTree* tree)
// tree - The node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildModDiv(GenTree* tree)
+int LinearScan::BuildModDiv(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- assert(info->dstCount == 1);
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+ regMaskTP dstCandidates = RBM_NONE;
+ RefPosition* internalDef = nullptr;
+ int srcCount = 0;
if (varTypeIsFloating(tree->TypeGet()))
{
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- return;
+ return BuildSimple(tree);
}
// Amd64 Div/Idiv instruction:
@@ -1731,13 +1739,13 @@ void LinearScan::BuildModDiv(GenTree* tree)
{
// We are interested in just the remainder.
// RAX is used as a trashable register during computation of remainder.
- info->setDstCandidates(this, RBM_RDX);
+ dstCandidates = RBM_RDX;
}
else
{
// We are interested in just the quotient.
// RDX gets used as trashable register during computation of quotient
- info->setDstCandidates(this, RBM_RAX);
+ dstCandidates = RBM_RAX;
}
#ifdef _TARGET_X86_
@@ -1748,38 +1756,34 @@ void LinearScan::BuildModDiv(GenTree* tree)
// To avoid reg move would like to have op1's low part in RAX and high part in RDX.
GenTree* loVal = op1->gtGetOp1();
GenTree* hiVal = op1->gtGetOp2();
+ assert(!loVal->isContained() && !hiVal->isContained());
assert(op2->IsCnsIntOrI());
assert(tree->OperGet() == GT_UMOD);
// This situation also requires an internal register.
- info->internalIntCount = 1;
- info->setInternalCandidates(this, allRegs(TYP_INT));
+ buildInternalIntRegisterDefForNode(tree);
- LocationInfoListNode* loValInfo = getLocationInfo(loVal);
- LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
- loValInfo->info.setSrcCandidates(this, RBM_EAX);
- hiValInfo->info.setSrcCandidates(this, RBM_EDX);
- useList.Append(loValInfo);
- useList.Append(hiValInfo);
- info->srcCount = 2;
+ BuildUse(loVal, RBM_EAX);
+ BuildUse(hiVal, RBM_EDX);
+ srcCount = 2;
}
else
#endif
{
- // If possible would like to have op1 in RAX to avoid a register move
- LocationInfoListNode* op1Info = getLocationInfo(op1);
- op1Info->info.setSrcCandidates(this, RBM_RAX);
- useList.Append(op1Info);
- info->srcCount = 1;
+ // If possible would like to have op1 in RAX to avoid a register move.
+ RefPosition* op1Use = BuildUse(op1, RBM_EAX);
+ tgtPrefUse = op1Use;
+ srcCount = 1;
}
- LocationInfoListNode* op2Info;
- info->srcCount += GetOperandInfo(op2, &op2Info);
- for (; op2Info != nullptr; op2Info = op2Info->Next())
- {
- op2Info->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
- }
+ srcCount += BuildDelayFreeUses(op2, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
+
+ buildInternalRegisterUses();
+
+ regMaskTP killMask = getKillSetForModDiv(tree->AsOp());
+ BuildDefsWithKills(tree, 1, dstCandidates, killMask);
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -1789,18 +1793,15 @@ void LinearScan::BuildModDiv(GenTree* tree)
// tree - The node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildIntrinsic(GenTree* tree)
+int LinearScan::BuildIntrinsic(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
// Both operand and its result must be of floating point type.
GenTree* op1 = tree->gtGetOp1();
assert(varTypeIsFloating(op1));
assert(op1->TypeGet() == tree->TypeGet());
-
- info->srcCount = GetOperandInfo(op1);
- assert(info->dstCount == 1);
+ RefPosition* internalFloatDef = nullptr;
switch (tree->gtIntrinsic.gtIntrinsicId)
{
@@ -1822,8 +1823,7 @@ void LinearScan::BuildIntrinsic(GenTree* tree)
// memory operands we can avoid the need for an internal register.
if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs)
{
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, internalFloatRegCandidates());
+ internalFloatDef = buildInternalFloatRegisterDefForNode(tree, internalFloatRegCandidates());
}
break;
@@ -1845,6 +1845,23 @@ void LinearScan::BuildIntrinsic(GenTree* tree)
unreached();
break;
}
+ assert(tree->gtGetOp2IfPresent() == nullptr);
+ int srcCount;
+ if (op1->isContained())
+ {
+ srcCount = BuildOperandUses(op1);
+ }
+ else
+ {
+ tgtPrefUse = BuildUse(op1);
+ srcCount = 1;
+ }
+ if (internalFloatDef != nullptr)
+ {
+ buildInternalRegisterUses();
+ }
+ BuildDef(tree);
+ return srcCount;
}
#ifdef FEATURE_SIMD
@@ -1855,34 +1872,29 @@ void LinearScan::BuildIntrinsic(GenTree* tree)
// tree - The GT_SIMD node of interest
//
// Return Value:
-// None.
-
-void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
+// The number of sources consumed by this node.
+//
+int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
{
- TreeNodeInfo* info = currentNodeInfo;
// Only SIMDIntrinsicInit can be contained. Other than that,
// only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount.
+ int dstCount = simdTree->IsValue() ? 1 : 0;
+ bool buildUses = true;
+ regMaskTP dstCandidates = RBM_NONE;
+
if (simdTree->isContained())
{
assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
}
- else if (info->dstCount != 1)
+ else if (dstCount != 1)
{
assert((simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ||
(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality));
}
SetContainsAVXFlags(true, simdTree->gtSIMDSize);
- GenTree* op1 = simdTree->gtOp.gtOp1;
- GenTree* op2 = simdTree->gtOp.gtOp2;
- info->srcCount = 0;
- if (!op1->OperIs(GT_LIST))
- {
- info->srcCount += GetOperandInfo(op1);
- }
- if ((op2 != nullptr) && !op2->isContained())
- {
- info->srcCount += GetOperandInfo(op2);
- }
+ GenTree* op1 = simdTree->gtGetOp1();
+ GenTree* op2 = simdTree->gtGetOp2();
+ int srcCount = 0;
switch (simdTree->gtSIMDIntrinsicID)
{
@@ -1904,18 +1916,24 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
if (op1lo->isContained())
{
+ srcCount = 0;
assert(op1hi->isContained());
assert((op1lo->IsIntegralConst(0) && op1hi->IsIntegralConst(0)) ||
(op1lo->IsIntegralConst(-1) && op1hi->IsIntegralConst(-1)));
- assert(info->srcCount == 0);
}
else
{
- assert(info->srcCount == 2);
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
- info->isInternalRegDelayFree = true;
+ srcCount = 2;
+ buildInternalFloatRegisterDefForNode(simdTree);
+ setInternalRegsDelayFree = true;
+ }
+
+ if (srcCount == 2)
+ {
+ BuildUse(op1lo, RBM_EAX);
+ BuildUse(op1hi, RBM_EDX);
}
+ buildUses = false;
}
#endif // !defined(_TARGET_64BIT_)
}
@@ -1924,34 +1942,31 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
case SIMDIntrinsicInitN:
{
var_types baseType = simdTree->gtSIMDBaseType;
- info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
- int initCount = 0;
+ srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
+ // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
+ buildInternalFloatRegisterDefForNode(simdTree);
+ int initCount = 0;
for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
{
assert(list->OperGet() == GT_LIST);
GenTree* listItem = list->gtGetOp1();
assert(listItem->TypeGet() == baseType);
assert(!listItem->isContained());
- appendLocationInfoToList(listItem);
+ BuildUse(listItem);
initCount++;
}
- assert(initCount == info->srcCount);
-
- // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
+ assert(initCount == srcCount);
+ buildUses = false;
}
break;
case SIMDIntrinsicInitArray:
// We have an array and an index, which may be contained.
- assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2));
break;
case SIMDIntrinsicDiv:
// SSE2 has no instruction support for division on integer vectors
noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- assert(info->srcCount == 2);
break;
case SIMDIntrinsicAbs:
@@ -1962,13 +1977,11 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
simdTree->gtSIMDBaseType == TYP_BYTE);
assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
- assert(info->srcCount == 1);
break;
case SIMDIntrinsicSqrt:
// SSE2 has no instruction support for sqrt on integer vectors.
noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- assert(info->srcCount == 1);
break;
case SIMDIntrinsicAdd:
@@ -1980,26 +1993,22 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
case SIMDIntrinsicBitwiseXor:
case SIMDIntrinsicMin:
case SIMDIntrinsicMax:
- assert(info->srcCount == 2);
-
// SSE2 32-bit integer multiplication requires two temp regs
if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
{
- info->internalFloatCount = 2;
- info->setInternalCandidates(this, allSIMDRegs());
+ buildInternalFloatRegisterDefForNode(simdTree);
+ buildInternalFloatRegisterDefForNode(simdTree);
}
break;
case SIMDIntrinsicEqual:
- assert(info->srcCount == 2);
break;
// SSE2 doesn't support < and <= directly on int vectors.
// Instead we need to use > and >= with swapped operands.
case SIMDIntrinsicLessThan:
case SIMDIntrinsicLessThanOrEqual:
- assert(info->srcCount == 2);
noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
break;
@@ -2008,7 +2017,6 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
// Instead we need to use < and <= with swapped operands.
case SIMDIntrinsicGreaterThan:
noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
- assert(info->srcCount == 2);
break;
case SIMDIntrinsicOpEquality:
@@ -2018,21 +2026,17 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
// If the second operand is contained then ContainCheckSIMD has determined
// that PTEST can be used. We only need a single source register and no
// internal registers.
- assert(info->srcCount == 1);
}
else
{
// Can't use PTEST so we need 2 source registers, 1 internal SIMD register
// (to hold the result of PCMPEQD or other similar SIMD compare instruction)
// and one internal INT register (to hold the result of PMOVMSKB).
- assert(info->srcCount == 2);
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
- info->internalIntCount = 1;
- info->addInternalCandidates(this, allRegs(TYP_INT));
+ buildInternalIntRegisterDefForNode(simdTree);
+ buildInternalFloatRegisterDefForNode(simdTree);
}
// These SIMD nodes only set the condition flags.
- info->dstCount = 0;
+ dstCount = 0;
break;
case SIMDIntrinsicDotProduct:
@@ -2052,11 +2056,10 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
if (varTypeIsFloating(simdTree->gtSIMDBaseType))
{
if ((compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) ||
- (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
+ (simdTree->gtGetOp1()->TypeGet() == TYP_SIMD32))
{
- info->internalFloatCount = 1;
- info->isInternalRegDelayFree = true;
- info->setInternalCandidates(this, allSIMDRegs());
+ buildInternalFloatRegisterDefForNode(simdTree);
+ setInternalRegsDelayFree = true;
}
// else don't need scratch reg(s).
}
@@ -2064,13 +2067,15 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
{
assert(simdTree->gtSIMDBaseType == TYP_INT && compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
- // No need to set isInternalRegDelayFree since targetReg is a
+ // No need to setInternalRegsDelayFree since targetReg is a
// an int type reg and guaranteed to be different from xmm/ymm
// regs.
- info->internalFloatCount = (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) ? 2 : 1;
- info->setInternalCandidates(this, allSIMDRegs());
+ buildInternalFloatRegisterDefForNode(simdTree);
+ if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
+ {
+ buildInternalFloatRegisterDefForNode(simdTree);
+ }
}
- assert(info->srcCount == 2);
break;
case SIMDIntrinsicGetItem:
@@ -2081,8 +2086,8 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
// The result is baseType of SIMD struct.
// op1 may be a contained memory op, but if so we will consume its address.
// op2 may be a contained constant.
- op1 = simdTree->gtOp.gtOp1;
- op2 = simdTree->gtOp.gtOp2;
+ op1 = simdTree->gtGetOp1();
+ op2 = simdTree->gtGetOp2();
if (!op1->isContained())
{
@@ -2115,10 +2120,38 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
if (needFloatTemp)
{
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
+ buildInternalFloatRegisterDefForNode(simdTree);
}
}
+#ifdef _TARGET_X86_
+ // This logic is duplicated from genSIMDIntrinsicGetItem().
+ // When we generate code for a SIMDIntrinsicGetItem, under certain circumstances we need to
+ // generate a movzx/movsx. On x86, these require byteable registers. So figure out which
+ // cases will require this, so the non-byteable registers can be excluded.
+
+ var_types baseType = simdTree->gtSIMDBaseType;
+ if (op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType))
+ {
+ bool ZeroOrSignExtnReqd = true;
+ unsigned baseSize = genTypeSize(baseType);
+ if (baseSize == 1)
+ {
+ if ((op2->gtIntCon.gtIconVal % 2) == 1)
+ {
+ ZeroOrSignExtnReqd = (baseType == TYP_BYTE);
+ }
+ }
+ else
+ {
+ assert(baseSize == 2);
+ ZeroOrSignExtnReqd = (baseType == TYP_SHORT);
+ }
+ if (ZeroOrSignExtnReqd)
+ {
+ dstCandidates = allByteRegs();
+ }
+ }
+#endif // _TARGET_X86_
}
}
break;
@@ -2127,107 +2160,87 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
case SIMDIntrinsicSetY:
case SIMDIntrinsicSetZ:
case SIMDIntrinsicSetW:
- assert(info->srcCount == 2);
-
// We need an internal integer register for SSE2 codegen
if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
{
- info->internalIntCount = 1;
- info->setInternalCandidates(this, allRegs(TYP_INT));
+ buildInternalIntRegisterDefForNode(simdTree);
}
break;
case SIMDIntrinsicCast:
- assert(info->srcCount == 1);
break;
case SIMDIntrinsicConvertToSingle:
- assert(info->srcCount == 1);
if (simdTree->gtSIMDBaseType == TYP_UINT)
{
// We need an internal register different from targetReg.
- info->isInternalRegDelayFree = true;
- info->internalIntCount = 1;
- info->internalFloatCount = 2;
- info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
+ setInternalRegsDelayFree = true;
+ buildInternalFloatRegisterDefForNode(simdTree);
+ buildInternalFloatRegisterDefForNode(simdTree);
+ // We also need an integer register.
+ buildInternalIntRegisterDefForNode(simdTree);
}
break;
case SIMDIntrinsicConvertToInt32:
- assert(info->srcCount == 1);
break;
case SIMDIntrinsicWidenLo:
case SIMDIntrinsicWidenHi:
- assert(info->srcCount == 1);
if (varTypeIsIntegral(simdTree->gtSIMDBaseType))
{
// We need an internal register different from targetReg.
- info->isInternalRegDelayFree = true;
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
+ setInternalRegsDelayFree = true;
+ buildInternalFloatRegisterDefForNode(simdTree);
}
break;
case SIMDIntrinsicConvertToInt64:
- assert(info->srcCount == 1);
// We need an internal register different from targetReg.
- info->isInternalRegDelayFree = true;
- info->internalIntCount = 1;
+ setInternalRegsDelayFree = true;
+ buildInternalFloatRegisterDefForNode(simdTree);
if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
- info->internalFloatCount = 2;
+ buildInternalFloatRegisterDefForNode(simdTree);
}
- else
- {
- info->internalFloatCount = 1;
- }
- info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
+ // We also need an integer register.
+ buildInternalIntRegisterDefForNode(simdTree);
break;
case SIMDIntrinsicConvertToDouble:
- assert(info->srcCount == 1);
// We need an internal register different from targetReg.
- info->isInternalRegDelayFree = true;
- info->internalIntCount = 1;
+ setInternalRegsDelayFree = true;
+ buildInternalFloatRegisterDefForNode(simdTree);
#ifdef _TARGET_X86_
if (simdTree->gtSIMDBaseType == TYP_LONG)
{
- info->internalFloatCount = 3;
+ buildInternalFloatRegisterDefForNode(simdTree);
+ buildInternalFloatRegisterDefForNode(simdTree);
}
else
#endif
if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) || (simdTree->gtSIMDBaseType == TYP_ULONG))
{
- info->internalFloatCount = 2;
+ buildInternalFloatRegisterDefForNode(simdTree);
}
- else
- {
- info->internalFloatCount = 1;
- }
- info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
+ // We also need an integer register.
+ buildInternalIntRegisterDefForNode(simdTree);
break;
case SIMDIntrinsicNarrow:
- assert(info->srcCount == 2);
// We need an internal register different from targetReg.
- info->isInternalRegDelayFree = true;
+ setInternalRegsDelayFree = true;
+ buildInternalFloatRegisterDefForNode(simdTree);
if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
{
- info->internalFloatCount = 2;
- }
- else
- {
- info->internalFloatCount = 1;
+ buildInternalFloatRegisterDefForNode(simdTree);
}
- info->setInternalCandidates(this, allSIMDRegs());
break;
case SIMDIntrinsicShuffleSSE2:
- assert(info->srcCount == 1);
// Second operand is an integer constant and marked as contained.
- assert(simdTree->gtOp.gtOp2->isContainedIntOrIImmed());
+ assert(simdTree->gtGetOp2()->isContainedIntOrIImmed());
break;
case SIMDIntrinsicGetX:
@@ -2245,6 +2258,23 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
noway_assert(!"Unimplemented SIMD node type.");
unreached();
}
+ if (buildUses)
+ {
+ assert(!op1->OperIs(GT_LIST));
+ assert(srcCount == 0);
+ // This is overly conservative, but is here for zero diffs.
+ srcCount = BuildRMWUses(simdTree);
+ }
+ buildInternalRegisterUses();
+ if (dstCount == 1)
+ {
+ BuildDef(simdTree, dstCandidates);
+ }
+ else
+ {
+ assert(dstCount == 0);
+ }
+ return srcCount;
}
#endif // FEATURE_SIMD
@@ -2256,11 +2286,10 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
// tree - The GT_HWIntrinsic node of interest
//
// Return Value:
-// None.
-
-void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
+// The number of sources consumed by this node.
+//
+int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
- TreeNodeInfo* info = currentNodeInfo;
NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
var_types baseType = intrinsicTree->gtSIMDBaseType;
InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID);
@@ -2272,31 +2301,32 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
SetContainsAVXFlags(true, 32);
}
+ GenTree* op1 = intrinsicTree->gtGetOp1();
+ GenTree* op2 = intrinsicTree->gtGetOp2();
+ GenTree* op3 = nullptr;
+ int srcCount = 0;
- GenTree* op1 = intrinsicTree->gtOp.gtOp1;
- GenTree* op2 = intrinsicTree->gtOp.gtOp2;
- info->srcCount = 0;
-
- if (op1 != nullptr)
+ if ((op1 != nullptr) && op1->OperIsList())
{
- if (op1->OperIsList())
- {
- for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
- {
- info->srcCount += GetOperandInfo(list->Current());
- }
- }
- else
- {
- info->srcCount += GetOperandInfo(op1);
- }
+ // op2 must be null, and there must be at least two more arguments.
+ assert(op2 == nullptr);
+ noway_assert(op1->AsArgList()->Rest() != nullptr);
+ noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr);
+ assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
+ op2 = op1->AsArgList()->Rest()->Current();
+ op3 = op1->AsArgList()->Rest()->Rest()->Current();
+ op1 = op1->AsArgList()->Current();
+ assert(numArgs >= 3);
}
-
- if (op2 != nullptr)
+ else
{
- info->srcCount += GetOperandInfo(op2);
+ assert(numArgs == (op2 == nullptr) ? 1 : 2);
}
+ int dstCount = intrinsicTree->IsValue() ? 1 : 0;
+ bool buildUses = true;
+ regMaskTP dstCandidates = RBM_NONE;
+
if ((category == HW_Category_IMM) && ((flags & HW_Flag_NoJmpTableIMM) == 0))
{
GenTree* lastOp = Compiler::lastOpOfHWIntrinsic(intrinsicTree, numArgs);
@@ -2308,25 +2338,19 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
// We need two extra reg when lastOp isn't a constant so
// the offset into the jump table for the fallback path
// can be computed.
-
- info->internalIntCount = 2;
- info->setInternalCandidates(this, allRegs(TYP_INT));
+ buildInternalIntRegisterDefForNode(intrinsicTree);
+ buildInternalIntRegisterDefForNode(intrinsicTree);
}
}
- // Check for "srcCount >= 2" to match against 3+ operand nodes where one is constant
- if ((op2 == nullptr) && (info->srcCount >= 2) && intrinsicTree->isRMWHWIntrinsic(compiler))
- {
- // TODO-XArch-CQ: This is currently done in order to handle intrinsics which have more than
- // two arguments but which still have RMW semantics (such as NI_SSE41_Insert). We should make
- // this handling more general and move it back out to LinearScan::BuildNode.
-
- assert(numArgs > 2);
- LocationInfoListNode* op2Info = useList.Begin()->Next();
- op2Info->info.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- }
+ // Determine whether this is an RMW operation where op2 must be marked delayFree so that it
+ // is not allocated the same register as the target.
+ bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler);
+ // Create internal temps, and handle any other special requirements.
+ // Note that the default case for building uses will handle the RMW flag, but if the uses
+ // are built in the individual cases, buildUses is set to false, and any RMW handling (delayFree)
+ // must be handled within the case.
switch (intrinsicID)
{
case NI_SSE_CompareEqualOrderedScalar:
@@ -2337,17 +2361,14 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
case NI_SSE2_CompareEqualUnorderedScalar:
case NI_SSE2_CompareNotEqualOrderedScalar:
case NI_SSE2_CompareNotEqualUnorderedScalar:
- info->internalIntCount = 1;
- info->setInternalCandidates(this, RBM_BYTE_REGS);
- info->isInternalRegDelayFree = true;
+ buildInternalIntRegisterDefForNode(intrinsicTree, allByteRegs());
+ setInternalRegsDelayFree = true;
break;
case NI_SSE_SetScalarVector128:
case NI_SSE2_SetScalarVector128:
- // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
- info->isInternalRegDelayFree = true;
+ buildInternalFloatRegisterDefForNode(intrinsicTree);
+ setInternalRegsDelayFree = true;
break;
case NI_SSE_ConvertToSingle:
@@ -2357,9 +2378,10 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
case NI_AVX_GetLowerHalf:
case NI_AVX_StaticCast:
{
- assert(info->srcCount == 1);
- assert(info->dstCount == 1);
- useList.Last()->info.isTgtPref = true;
+ srcCount = 1;
+ assert(dstCount == 1);
+ tgtPrefUse = BuildUse(op1);
+ buildUses = false;
break;
}
@@ -2367,12 +2389,11 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
if (varTypeIsIntegral(baseType))
{
- info->internalFloatCount = 1;
+ buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs());
if (!compiler->compSupports(InstructionSet_AVX2) && varTypeIsByte(baseType))
{
- info->internalFloatCount += 1;
+ buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs());
}
- info->setInternalCandidates(this, allSIMDRegs());
}
break;
}
@@ -2380,40 +2401,48 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
case NI_SSE2_MaskMove:
{
// SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI
- LocationInfoListNode* op3Info = useList.Begin()->Next()->Next();
- op3Info->info.setSrcCandidates(this, RBM_EDI);
+ assert(!intrinsicTree->isRMWHWIntrinsic(compiler));
+ // MaskMove doesn't have RMW semantics.
+ assert(!isRMW);
+ BuildUse(op1);
+ BuildUse(op2);
+ BuildUse(op3, RBM_EDI);
+ buildUses = false;
break;
}
case NI_SSE41_BlendVariable:
if (!compiler->canUseVexEncoding())
{
+ assert(numArgs == 3);
+ assert(intrinsicTree->isRMWHWIntrinsic(compiler));
+ assert(isRMW);
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
- LocationInfoListNode* op2Info = useList.Begin()->Next();
- LocationInfoListNode* op3Info = op2Info->Next();
- op2Info->info.isDelayFree = true;
- op3Info->info.isDelayFree = true;
- op3Info->info.setSrcCandidates(this, RBM_XMM0);
- info->hasDelayFreeSrc = true;
+ BuildUse(op1);
+ RefPosition* op2Use = BuildUse(op2);
+ setDelayFree(op2Use);
+ RefPosition* op3Use = BuildUse(op3, RBM_XMM0);
+ setDelayFree(op3Use);
+ buildUses = false;
+ srcCount = 3;
}
break;
case NI_SSE41_TestAllOnes:
{
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
+ buildInternalFloatRegisterDefForNode(intrinsicTree);
break;
}
case NI_SSE41_Extract:
if (baseType == TYP_FLOAT)
{
- info->internalIntCount += 1;
+ buildInternalIntRegisterDefForNode(intrinsicTree);
}
#ifdef _TARGET_X86_
else if (varTypeIsByte(baseType))
{
- info->setDstCandidates(this, RBM_BYTE_REGS);
+ dstCandidates = allByteRegs();
}
#endif
break;
@@ -2425,12 +2454,18 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
//
// TODO - currently we use the BaseType to bring the type of the second argument
// to the code generator. May encode the overload info in other way.
- var_types srcType = intrinsicTree->gtSIMDBaseType;
+ var_types srcType = intrinsicTree->gtSIMDBaseType;
+ regMaskTP op2Candidates = RBM_NONE;
+ BuildUse(op1);
if (varTypeIsByte(srcType))
{
- LocationInfoListNode* op2Info = useList.GetSecond(INDEBUG(intrinsicTree->gtGetOp2()));
- op2Info->info.setSrcCandidates(this, RBM_BYTE_REGS);
+ op2Candidates = allByteRegs();
}
+ RefPosition* op2Use = BuildUse(op2, op2Candidates);
+ assert(isRMW);
+ setDelayFree(op2Use);
+ srcCount = 2;
+ buildUses = false;
break;
}
#endif // _TARGET_X86_
@@ -2439,6 +2474,48 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END));
break;
}
+
+ if (buildUses)
+ {
+ if (numArgs > 3)
+ {
+ srcCount = 0;
+ assert(!intrinsicTree->isRMWHWIntrinsic(compiler));
+ assert(op1->OperIs(GT_LIST));
+ {
+ for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
+ {
+ srcCount += BuildOperandUses(list->Current());
+ }
+ }
+ assert(srcCount == numArgs);
+ }
+ else
+ {
+ if (op1 != nullptr)
+ {
+ srcCount += BuildOperandUses(op1);
+ if (op2 != nullptr)
+ {
+ srcCount += (isRMW) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2);
+ if (op3 != nullptr)
+ {
+ srcCount += (isRMW) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
+ }
+ }
+ }
+ }
+ }
+ buildInternalRegisterUses();
+ if (dstCount == 1)
+ {
+ RefPosition* def = BuildDef(intrinsicTree, dstCandidates);
+ }
+ else
+ {
+ assert(dstCount == 0);
+ }
+ return srcCount;
}
#endif
@@ -2449,11 +2526,10 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
// tree - The node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildCast(GenTree* tree)
+int LinearScan::BuildCast(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
// TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
// see CodeGen::genIntToIntCast()
@@ -2464,25 +2540,36 @@ void LinearScan::BuildCast(GenTree* tree)
var_types castToType = tree->CastToType();
GenTree* castOp = tree->gtCast.CastOp();
var_types castOpType = castOp->TypeGet();
+ regMaskTP candidates = RBM_NONE;
- info->srcCount = GetOperandInfo(castOp);
- assert(info->dstCount == 1);
if (tree->gtFlags & GTF_UNSIGNED)
{
castOpType = genUnsignedType(castOpType);
}
+#ifdef _TARGET_X86_
+ if (varTypeIsByte(castToType))
+ {
+ candidates = allByteRegs();
+ }
+#endif // _TARGET_X86_
+
// some overflow checks need a temp reg:
// - GT_CAST from INT64/UINT64 to UINT32
+ RefPosition* internalDef = nullptr;
if (tree->gtOverflow() && (castToType == TYP_UINT))
{
if (genTypeSize(castOpType) == 8)
{
// Here we don't need internal register to be different from targetReg,
// rather require it to be different from operand's reg.
- info->internalIntCount = 1;
+ buildInternalIntRegisterDefForNode(tree);
}
}
+ int srcCount = BuildOperandUses(castOp, candidates);
+ buildInternalRegisterUses();
+ BuildDef(tree, candidates);
+ return srcCount;
}
//-----------------------------------------------------------------------------------------
@@ -2491,20 +2578,44 @@ void LinearScan::BuildCast(GenTree* tree)
// Arguments:
// indirTree - GT_IND or GT_STOREIND gentree node
//
-void LinearScan::BuildIndir(GenTreeIndir* indirTree)
+// Return Value:
+// The number of sources consumed by this node.
+//
+int LinearScan::BuildIndir(GenTreeIndir* indirTree)
{
- TreeNodeInfo* info = currentNodeInfo;
// If this is the rhs of a block copy (i.e. non-enregisterable struct),
// it has no register requirements.
if (indirTree->TypeGet() == TYP_STRUCT)
{
- return;
+ return 0;
}
- int indirSrcCount = GetIndirInfo(indirTree);
+#ifdef FEATURE_SIMD
+ RefPosition* internalFloatDef = nullptr;
+ if (indirTree->TypeGet() == TYP_SIMD12)
+ {
+ // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir().
+ assert(!indirTree->Addr()->isContained());
+
+ // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
+ // To assemble the vector properly we would need an additional
+ // XMM register.
+ internalFloatDef = buildInternalFloatRegisterDefForNode(indirTree);
+
+ // In case of GT_IND we need an internal register different from targetReg and
+ // both of the registers are used at the same time.
+ if (indirTree->OperGet() == GT_IND)
+ {
+ setInternalRegsDelayFree = true;
+ }
+ }
+#endif // FEATURE_SIMD
+
+ regMaskTP indirCandidates = RBM_NONE;
+ int srcCount = BuildIndirUses(indirTree, indirCandidates);
if (indirTree->gtOper == GT_STOREIND)
{
- GenTree* source = indirTree->gtOp.gtOp2;
+ GenTree* source = indirTree->gtGetOp2();
if (indirTree->AsStoreInd()->IsRMWMemoryOp())
{
// Because 'source' is contained, we haven't yet determined its special register requirements, if any.
@@ -2515,50 +2626,49 @@ void LinearScan::BuildIndir(GenTreeIndir* indirTree)
if (source->OperIsShiftOrRotate())
{
- info->srcCount += BuildShiftRotate(source);
+ srcCount += BuildShiftRotate(source);
}
else
{
- info->srcCount += appendBinaryLocationInfoToList(source->AsOp());
- }
- if (indirTree->AsStoreInd()->IsRMWDstOp1())
- {
- otherIndir = source->gtGetOp1()->AsIndir();
- if (source->OperIsBinary())
+ regMaskTP srcCandidates = RBM_NONE;
+
+#ifdef _TARGET_X86_
+ // Determine if we need byte regs for the non-mem source, if any.
+ // Note that BuildShiftRotate (above) will handle the byte requirement as needed,
+ // but STOREIND isn't itself an RMW op, so we have to explicitly set it for that case.
+
+ GenTree* nonMemSource = nullptr;
+
+ if (indirTree->AsStoreInd()->IsRMWDstOp1())
{
- nonMemSource = source->gtOp.gtOp2;
+ otherIndir = source->gtGetOp1()->AsIndir();
+ if (source->OperIsBinary())
+ {
+ nonMemSource = source->gtGetOp2();
+ }
}
- }
- else if (indirTree->AsStoreInd()->IsRMWDstOp2())
- {
- otherIndir = source->gtGetOp2()->AsIndir();
- nonMemSource = source->gtOp.gtOp1;
- }
- if (otherIndir != nullptr)
- {
- // Any lclVars in the addressing mode of this indirection are contained.
- // If they are marked as lastUse, transfer the last use flag to the store indir.
- GenTree* base = otherIndir->Base();
- GenTree* dstBase = indirTree->Base();
- CheckAndMoveRMWLastUse(base, dstBase);
- GenTree* index = otherIndir->Index();
- GenTree* dstIndex = indirTree->Index();
- CheckAndMoveRMWLastUse(index, dstIndex);
- }
- if (nonMemSource != nullptr)
- {
- assert(!nonMemSource->isContained() || (!nonMemSource->isMemoryOp() && !nonMemSource->IsLocal()));
-#ifdef _TARGET_X86_
- if (varTypeIsByte(indirTree) && !nonMemSource->isContained())
+ else if (indirTree->AsStoreInd()->IsRMWDstOp2())
{
- // If storeInd is of TYP_BYTE, set source to byteable registers.
- TreeNodeInfo& nonMemSourceInfo = useList.GetTreeNodeInfo(nonMemSource);
- regMaskTP regMask = nonMemSourceInfo.getSrcCandidates(this);
- regMask &= ~RBM_NON_BYTE_REGS;
- assert(regMask != RBM_NONE);
- nonMemSourceInfo.setSrcCandidates(this, regMask);
+ otherIndir = source->gtGetOp2()->AsIndir();
+ nonMemSource = source->gtGetOp1();
+ }
+ if ((nonMemSource != nullptr) && !nonMemSource->isContained() && varTypeIsByte(indirTree))
+ {
+ srcCandidates = RBM_BYTE_REGS;
}
#endif
+ if (otherIndir != nullptr)
+ {
+ // Any lclVars in the addressing mode of this indirection are contained.
+ // If they are marked as lastUse, transfer the last use flag to the store indir.
+ GenTree* base = otherIndir->Base();
+ GenTree* dstBase = indirTree->Base();
+ CheckAndMoveRMWLastUse(base, dstBase);
+ GenTree* index = otherIndir->Index();
+ GenTree* dstIndex = indirTree->Index();
+ CheckAndMoveRMWLastUse(index, dstIndex);
+ }
+ srcCount += BuildBinaryUses(source->AsOp(), srcCandidates);
}
}
else
@@ -2566,49 +2676,25 @@ void LinearScan::BuildIndir(GenTreeIndir* indirTree)
#ifdef _TARGET_X86_
if (varTypeIsByte(indirTree) && !source->isContained())
{
- // If storeInd is of TYP_BYTE, set source to byteable registers.
- LocationInfoListNode* sourceInfo = getLocationInfo(source);
- regMaskTP regMask = sourceInfo->info.getSrcCandidates(this);
- regMask &= ~RBM_NON_BYTE_REGS;
- assert(regMask != RBM_NONE);
- sourceInfo->info.setSrcCandidates(this, regMask);
- useList.Append(sourceInfo);
- info->srcCount++;
+ BuildUse(source, allByteRegs());
+ srcCount++;
}
else
#endif
{
- info->srcCount += GetOperandInfo(source);
+ srcCount += BuildOperandUses(source);
}
}
}
- info->srcCount += indirSrcCount;
-
#ifdef FEATURE_SIMD
- if (indirTree->TypeGet() == TYP_SIMD12)
- {
- // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir().
- assert(!indirTree->Addr()->isContained());
-
- // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
- // To assemble the vector properly we would need an additional
- // XMM register.
- info->internalFloatCount = 1;
-
- // In case of GT_IND we need an internal register different from targetReg and
- // both of the registers are used at the same time.
- if (indirTree->OperGet() == GT_IND)
- {
- info->isInternalRegDelayFree = true;
- }
-
- info->setInternalCandidates(this, allSIMDRegs());
-
- return;
- }
+ buildInternalRegisterUses();
#endif // FEATURE_SIMD
- assert(indirTree->Addr()->gtOper != GT_ARR_ELEM);
+ if (indirTree->gtOper != GT_STOREIND)
+ {
+ BuildDef(indirTree);
+ }
+ return srcCount;
}
//------------------------------------------------------------------------
@@ -2618,27 +2704,23 @@ void LinearScan::BuildIndir(GenTreeIndir* indirTree)
// tree - The node of interest
//
// Return Value:
-// None.
+// The number of sources consumed by this node.
//
-void LinearScan::BuildMul(GenTree* tree)
+int LinearScan::BuildMul(GenTree* tree)
{
- TreeNodeInfo* info = currentNodeInfo;
-#if defined(_TARGET_X86_)
- assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG));
-#else
- assert(tree->OperIs(GT_MUL, GT_MULHI));
-#endif
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- assert(info->dstCount == 1);
+ assert(tree->OperIsMul());
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
- // Case of float/double mul.
+ // Only non-floating point mul has special requirements
if (varTypeIsFloating(tree->TypeGet()))
{
- return;
+ return BuildSimple(tree);
}
+ int srcCount = BuildBinaryUses(tree->AsOp());
+ regMaskTP dstCandidates = RBM_NONE;
+
bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0);
bool requiresOverflowCheck = tree->gtOverflowEx();
@@ -2666,19 +2748,19 @@ void LinearScan::BuildMul(GenTree* tree)
// Here we set RAX as the only destination candidate
// In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
//
- info->setDstCandidates(this, RBM_RAX);
+ dstCandidates = RBM_RAX;
}
else if (tree->OperGet() == GT_MULHI)
{
// Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the
// upper 32 bits of the result set the destination candidate to REG_RDX.
- info->setDstCandidates(this, RBM_RDX);
+ dstCandidates = RBM_RDX;
}
#if defined(_TARGET_X86_)
else if (tree->OperGet() == GT_MUL_LONG)
{
// have to use the encoding:RDX:RAX = RAX * rm
- info->setDstCandidates(this, RBM_RAX);
+ dstCandidates = RBM_RAX;
}
#endif
GenTree* containedMemOp = nullptr;
@@ -2691,10 +2773,9 @@ void LinearScan::BuildMul(GenTree* tree)
{
containedMemOp = op2;
}
- if ((containedMemOp != nullptr) && CheckAndSetDelayFree(containedMemOp))
- {
- info->hasDelayFreeSrc = true;
- }
+ regMaskTP killMask = getKillSetForMul(tree->AsOp());
+ BuildDefsWithKills(tree, 1, dstCandidates, killMask);
+ return srcCount;
}
//------------------------------------------------------------------------------
@@ -2717,117 +2798,4 @@ void LinearScan::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsi
}
}
-#ifdef _TARGET_X86_
-//------------------------------------------------------------------------
-// ExcludeNonByteableRegisters: Determines if we need to exclude non-byteable registers for
-// various reasons
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// If we need to exclude non-byteable registers
-//
-bool LinearScan::ExcludeNonByteableRegisters(GenTree* tree)
-{
- // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
- // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
- // value. In this case we need to exclude esi/edi from the src candidates of op2.
- if (varTypeIsByte(tree))
- {
- return true;
- }
- // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
- else if ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType()))
- {
- return true;
- }
- else if (tree->OperIsCompare() || tree->OperIs(GT_CMP))
- {
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
- // ubyte as the result of comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size. Here is an example of generated code:
- // cmp dl, byte ptr[addr mode]
- // movzx edx, dl
- if (varTypeIsByte(op1) && varTypeIsByte(op2))
- {
- return true;
- }
- // Example4: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses
- // ubyte as the result of the comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size.
- else if (varTypeIsByte(op1) && op2->IsCnsIntOrI())
- {
- return true;
- }
- // Example4: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses
- // ubyte as the result of the comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size.
- else if (op1->IsCnsIntOrI() && varTypeIsByte(op2))
- {
- return true;
- }
- else
- {
- return false;
- }
- }
-#ifdef FEATURE_SIMD
- else if (tree->OperGet() == GT_SIMD)
- {
- GenTreeSIMD* simdNode = tree->AsSIMD();
- switch (simdNode->gtSIMDIntrinsicID)
- {
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
- // We manifest it into a byte register, so the target must be byteable.
- return true;
-
- case SIMDIntrinsicGetItem:
- {
- // This logic is duplicated from genSIMDIntrinsicGetItem().
- // When we generate code for a SIMDIntrinsicGetItem, under certain circumstances we need to
- // generate a movzx/movsx. On x86, these require byteable registers. So figure out which
- // cases will require this, so the non-byteable registers can be excluded.
-
- GenTree* op1 = simdNode->gtGetOp1();
- GenTree* op2 = simdNode->gtGetOp2();
- var_types baseType = simdNode->gtSIMDBaseType;
- if (!isContainableMemoryOp(op1) && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType))
- {
- bool ZeroOrSignExtnReqd = true;
- unsigned baseSize = genTypeSize(baseType);
- if (baseSize == 1)
- {
- if ((op2->gtIntCon.gtIconVal % 2) == 1)
- {
- ZeroOrSignExtnReqd = (baseType == TYP_BYTE);
- }
- }
- else
- {
- assert(baseSize == 2);
- ZeroOrSignExtnReqd = (baseType == TYP_SHORT);
- }
- return ZeroOrSignExtnReqd;
- }
- break;
- }
-
- default:
- break;
- }
- return false;
- }
-#endif // FEATURE_SIMD
- else
- {
- return false;
- }
-}
-#endif // _TARGET_X86_
-
#endif // _TARGET_XARCH_
diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h
deleted file mode 100644
index d689037630..0000000000
--- a/src/jit/nodeinfo.h
+++ /dev/null
@@ -1,153 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-#ifndef _NODEINFO_H_
-#define _NODEINFO_H_
-
-struct GenTree;
-
-class LinearScan;
-typedef unsigned int LsraLocation;
-
-class TreeNodeInfo
-{
-public:
- TreeNodeInfo()
- {
- _dstCount = 0;
- _srcCount = 0;
- _internalIntCount = 0;
- _internalFloatCount = 0;
-
- srcCandsIndex = 0;
- dstCandsIndex = 0;
- internalCandsIndex = 0;
- isLocalDefUse = false;
- isDelayFree = false;
- hasDelayFreeSrc = false;
- isTgtPref = false;
- isInternalRegDelayFree = false;
-#ifdef DEBUG
- isInitialized = false;
-#endif
- }
-
- // dst
- __declspec(property(put = setDstCount, get = getDstCount)) int dstCount;
- void setDstCount(int count)
- {
- assert(count <= MAX_RET_REG_COUNT);
- _dstCount = (char)count;
- }
- int getDstCount()
- {
- return _dstCount;
- }
-
- // src
- __declspec(property(put = setSrcCount, get = getSrcCount)) int srcCount;
- void setSrcCount(int count)
- {
- _srcCount = (char)count;
- assert(_srcCount == count);
- }
- int getSrcCount()
- {
- return _srcCount;
- }
-
- // internalInt
- __declspec(property(put = setInternalIntCount, get = getInternalIntCount)) int internalIntCount;
- void setInternalIntCount(int count)
- {
- _internalIntCount = (char)count;
- assert(_internalIntCount == count);
- }
- int getInternalIntCount()
- {
- return _internalIntCount;
- }
-
- // internalFloat
- __declspec(property(put = setInternalFloatCount, get = getInternalFloatCount)) int internalFloatCount;
- void setInternalFloatCount(int count)
- {
- _internalFloatCount = (char)count;
- assert(_internalFloatCount == count);
- }
- int getInternalFloatCount()
- {
- return _internalFloatCount;
- }
-
- // SrcCandidates are constraints of the consuming (parent) operation applied to this node
- // (i.e. what registers it is constrained to consume).
- regMaskTP getSrcCandidates(LinearScan* lsra);
- void setSrcCandidates(LinearScan* lsra, regMaskTP mask);
- // DstCandidates are constraints of this node (i.e. what registers it is constrained to produce).
- regMaskTP getDstCandidates(LinearScan* lsra);
- void setDstCandidates(LinearScan* lsra, regMaskTP mask);
- // InternalCandidates are constraints of the registers used as temps in the evaluation of this node.
- regMaskTP getInternalCandidates(LinearScan* lsra);
- void setInternalCandidates(LinearScan* lsra, regMaskTP mask);
- void addInternalCandidates(LinearScan* lsra, regMaskTP mask);
-
-public:
- unsigned char srcCandsIndex;
- unsigned char dstCandsIndex;
- unsigned char internalCandsIndex;
-
-private:
- unsigned char _srcCount : 5;
- unsigned char _dstCount : 3;
- unsigned char _internalIntCount : 3;
- unsigned char _internalFloatCount : 3;
-
-public:
- // isLocalDefUse identifies trees that produce a value that is not consumed elsewhere.
- // Examples include stack arguments to a call (they are immediately stored), lhs of comma
- // nodes, or top-level nodes that are non-void.
- unsigned char isLocalDefUse : 1;
-
- // isDelayFree is set when the register defined by this node will interfere with the destination
- // of the consuming node, and therefore it must not be freed immediately after use.
- unsigned char isDelayFree : 1;
-
- // hasDelayFreeSrc is set when this node has sources that are marked "isDelayFree". This is because,
- // we may eventually "contain" this node, in which case we don't want it's children (which have
- // already been marked "isDelayFree" to be handled that way when allocating.
- unsigned char hasDelayFreeSrc : 1;
-
- // isTgtPref is set to true when we have a rmw op, where we would like the result to be allocated
- // in the same register as op1.
- unsigned char isTgtPref : 1;
-
- // Whether internal register needs to be different from targetReg
- // in which result is produced.
- unsigned char isInternalRegDelayFree : 1;
-
-#ifdef DEBUG
- // isInitialized is set when the tree node is handled.
- unsigned char isInitialized : 1;
-#endif
-
-public:
- // Initializes the TreeNodeInfo value with the given values.
- void Initialize(LinearScan* lsra, GenTree* node);
-
-#ifdef DEBUG
- void dump(LinearScan* lsra);
-
- // This method checks to see whether the information has been initialized,
- // and is in a consistent state
- bool IsValid(LinearScan* lsra)
- {
- return (isInitialized &&
- ((getSrcCandidates(lsra) | getInternalCandidates(lsra) | getDstCandidates(lsra)) &
- ~(RBM_ALLFLOAT | RBM_ALLINT)) == 0);
- }
-#endif // DEBUG
-};
-
-#endif // _NODEINFO_H_