summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2018-01-28 16:20:51 -0800
committerCarol Eidt <carol.eidt@microsoft.com>2018-02-01 14:12:14 -0800
commit6b0f3f88535968aaeddcf3da950fe5988ed3d459 (patch)
tree2cdd92831047cd1a7a75ce14cb6b4f8a47e1e281 /src
parent01c93b958e67a3e02d00be287cb2e15e7e9ea3e3 (diff)
downloadcoreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.tar.gz
coreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.tar.bz2
coreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.zip
Refactor RefPosition and Interval Building
Move code for building `RefPosition`s and `Interval`s out of lsra.cpp into lsrabuild.cpp. Also, move common code from lsraarm*.cpp and lsraxarch.cpp to lsrabuild.cpp. Maintain the `ListNodePool` on the `LinearScan` object to be used by all the building methods. Rename `TreeNodeInfoInit` methods to `Build`, to more accurately reflect the next round of changes where they will build the `RefPosition`s directly.
Diffstat (limited to 'src')
-rw-r--r--src/jit/CMakeLists.txt1
-rw-r--r--src/jit/compiler.hpp33
-rw-r--r--src/jit/lowerxarch.cpp4
-rw-r--r--src/jit/lsra.cpp2688
-rw-r--r--src/jit/lsra.h164
-rw-r--r--src/jit/lsraarm.cpp134
-rw-r--r--src/jit/lsraarm64.cpp121
-rw-r--r--src/jit/lsraarmarch.cpp249
-rw-r--r--src/jit/lsrabuild.cpp3205
-rw-r--r--src/jit/lsraxarch.cpp504
10 files changed, 3522 insertions, 3581 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
index 74380e66d4..f4626ec562 100644
--- a/src/jit/CMakeLists.txt
+++ b/src/jit/CMakeLists.txt
@@ -53,6 +53,7 @@ set( JIT_SOURCES
loopcloning.cpp
lower.cpp
lsra.cpp
+ lsrabuild.cpp
morph.cpp
objectalloc.cpp
optcse.cpp
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index 5a2a2342e8..f905e73587 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -150,9 +150,9 @@ unsigned __int64 genFindHighestBit(unsigned __int64 mask)
#endif // 0
/*****************************************************************************
- *
- * Return true if the given 64-bit value has exactly zero or one bits set.
- */
+*
+* Return true if the given 64-bit value has exactly zero or one bits set.
+*/
template <typename T>
inline BOOL genMaxOneBit(T value)
@@ -161,9 +161,9 @@ inline BOOL genMaxOneBit(T value)
}
/*****************************************************************************
- *
- * Return true if the given 32-bit value has exactly zero or one bits set.
- */
+*
+* Return true if the given 32-bit value has exactly zero or one bits set.
+*/
inline BOOL genMaxOneBit(unsigned value)
{
@@ -171,6 +171,27 @@ inline BOOL genMaxOneBit(unsigned value)
}
/*****************************************************************************
+*
+* Return true if the given 64-bit value has exactly one bit set.
+*/
+
+template <typename T>
+inline BOOL genExactlyOneBit(T value)
+{
+ return ((value != 0) && genMaxOneBit(value));
+}
+
+/*****************************************************************************
+*
+* Return true if the given 32-bit value has exactly zero or one bits set.
+*/
+
+inline BOOL genExactlyOneBit(unsigned value)
+{
+ return ((value != 0) && genMaxOneBit(value));
+}
+
+/*****************************************************************************
*
* Given a value that has exactly one bit set, return the position of that
* bit, in other words return the logarithm in base 2 of the given value.
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 70782f19fc..d212d86163 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -1500,8 +1500,8 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node)
//
// Workaround:
// Note that LowerVirtualStubCall() sets addr->gtRegNum to VirtualStubParam.reg and Lowering::doPhase()
- // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA before calling
- // TreeNodeInfoInit(). Ideally we should set a flag on addr nodes that shouldn't be marked as contained
+ // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA.
+ // Ideally we should set a flag on addr nodes that shouldn't be marked as contained
// (in LowerVirtualStubCall()), but we don't have any GTF_* flags left for that purpose. As a workaround
// an explicit check is made here.
//
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 641a9a1d11..07ce5503cb 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -333,36 +333,6 @@ regMaskTP LinearScan::internalFloatRegCandidates()
}
/*****************************************************************************
- * Register types
- *****************************************************************************/
-template <class T>
-RegisterType regType(T type)
-{
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(type))
- {
- return FloatRegisterType;
- }
-#endif // FEATURE_SIMD
- return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType;
-}
-
-bool useFloatReg(var_types type)
-{
- return (regType(type) == FloatRegisterType);
-}
-
-bool registerTypesEquivalent(RegisterType a, RegisterType b)
-{
- return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b);
-}
-
-bool isSingleRegister(regMaskTP regMask)
-{
- return (regMask != RBM_NONE && genMaxOneBit(regMask));
-}
-
-/*****************************************************************************
* Inline functions for RegRecord
*****************************************************************************/
@@ -470,198 +440,6 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
}
#endif // DEBUG
-// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately
-// set such fields as isStructField
-
-Interval* LinearScan::newInterval(RegisterType theRegisterType)
-{
- intervals.emplace_back(theRegisterType, allRegs(theRegisterType));
- Interval* newInt = &intervals.back();
-
-#ifdef DEBUG
- newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1);
-#endif // DEBUG
-
- DBEXEC(VERBOSE, newInt->dump());
- return newInt;
-}
-
-RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
-{
- refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType);
- RefPosition* newRP = &refPositions.back();
-#ifdef DEBUG
- newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1);
-#endif // DEBUG
- return newRP;
-}
-
-//------------------------------------------------------------------------
-// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use
-// register requirements on a single-def, single-use interval.
-//
-// Arguments:
-// defRefPosition - The interval definition
-// useRefPosition - The (sole) interval use
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The two RefPositions are for the same interval, which is a tree-temp.
-//
-// Notes:
-// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg.
-// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that,
-// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs
-// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4
-// below.
-// Here are the cases we consider (in this order):
-// 1. If The defRefPosition specifies a single register, and there are no conflicting
-// FixedReg uses of it between the def and use, we use that register, and the code generator
-// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def.
-// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no
-// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator
-// will insert the copy.
-// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined
-// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register),
-/// we set the register requirements on the defRefPosition to the use registers, and the
-// code generator will insert a copy on the def. We can't rely on the code generator to put a copy
-// on the use if it has multiple possible candidates, as it won't know which one has been allocated.
-// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register
-// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set
-// the useRefPosition to the def registers, for similar reasons to case #3.
-// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts,
-// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are
-// single registers, codegen can insert the copy.
-// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but
-// has a conflict), we must insert a copy. The copy will be inserted before the use if the
-// use is not fixed (in the fixed case, the code generator will insert the use).
-//
-// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is
-// available for the lifetime. We end up allocating a register that must be spilled, and it probably
-// won't be the register that is actually defined by the target instruction. So, we have to copy it
-// and THEN spill it. In this case, we should be using the def requirement. But we need to change
-// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but
-// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway
-// we can use the fixed-reg on the def.
-//
-
-void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition)
-{
- assert(!interval->isLocalVar);
-
- RefPosition* useRefPosition = defRefPosition->nextRefPosition;
- regMaskTP defRegAssignment = defRefPosition->registerAssignment;
- regMaskTP useRegAssignment = useRefPosition->registerAssignment;
- RegRecord* defRegRecord = nullptr;
- RegRecord* useRegRecord = nullptr;
- regNumber defReg = REG_NA;
- regNumber useReg = REG_NA;
- bool defRegConflict = false;
- bool useRegConflict = false;
-
- // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment
- // on it, or we will fail to ensure that the fixedReg is busy at the time the target
- // (of the node that uses this interval) is allocated.
- bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree;
-
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT));
- if (!canChangeUseAssignment)
- {
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE));
- }
- if (defRefPosition->isFixedRegRef)
- {
- defReg = defRefPosition->assignedReg();
- defRegRecord = getRegisterRecord(defReg);
- if (canChangeUseAssignment)
- {
- RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
- assert(currFixedRegRefPosition != nullptr &&
- currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation);
-
- if (currFixedRegRefPosition->nextRefPosition == nullptr ||
- currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation())
- {
- // This is case #1. Use the defRegAssignment
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1));
- useRefPosition->registerAssignment = defRegAssignment;
- return;
- }
- else
- {
- defRegConflict = true;
- }
- }
- }
- if (useRefPosition->isFixedRegRef)
- {
- useReg = useRefPosition->assignedReg();
- useRegRecord = getRegisterRecord(useReg);
- RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition;
-
- // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null.
- RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition();
- assert(nextFixedRegRefPosition != nullptr &&
- nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation);
-
- // First, check to see if there are any conflicting FixedReg references between the def and use.
- if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation)
- {
- // OK, no conflicting FixedReg references.
- // Now, check to see whether it is currently in use.
- if (useRegRecord->assignedInterval != nullptr)
- {
- RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition;
- LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation();
- if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation)
- {
- useRegConflict = true;
- }
- }
- if (!useRegConflict)
- {
- // This is case #2. Use the useRegAssignment
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2));
- defRefPosition->registerAssignment = useRegAssignment;
- return;
- }
- }
- else
- {
- useRegConflict = true;
- }
- }
- if (defRegRecord != nullptr && !useRegConflict)
- {
- // This is case #3.
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3));
- defRefPosition->registerAssignment = useRegAssignment;
- return;
- }
- if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment)
- {
- // This is case #4.
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4));
- useRefPosition->registerAssignment = defRegAssignment;
- return;
- }
- if (defRegRecord != nullptr && useRegRecord != nullptr)
- {
- // This is case #5.
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5));
- RegisterType regType = interval->registerType;
- assert((getRegisterType(interval, defRefPosition) == regType) &&
- (getRegisterType(interval, useRefPosition) == regType));
- regMaskTP candidates = allRegs(regType);
- defRefPosition->registerAssignment = candidates;
- return;
- }
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6));
- return;
-}
-
//------------------------------------------------------------------------
// conflictingFixedRegReference: Determine whether the current RegRecord has a
// fixed register use that conflicts with 'refPosition'
@@ -708,242 +486,6 @@ bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition)
return false;
}
-void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp)
-{
-#ifdef _TARGET_AMD64_
- if (compiler->opts.compDbgEnC)
- {
- // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs.
- return;
- }
-#endif // _TARGET_AMD64_
-
- Interval* theInterval = rp->getInterval();
-
-#ifdef DEBUG
- if (!doReverseCallerCallee())
-#endif // DEBUG
- {
- // Set preferences so that this register set will be preferred for earlier refs
- theInterval->updateRegisterPreferences(rp->registerAssignment);
- }
-}
-
-//------------------------------------------------------------------------
-// checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps.
-//
-// Arguments:
-// useRP - The use RefPosition of a tree temp (SDSU Interval)
-//
-// Notes:
-// There are a couple of cases where this may over-constrain allocation:
-// 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or
-// 2. In the case where the defining node requires a temp distinct from the target (also a
-// delay-free case).
-// In those cases, if we propagate a single-register restriction from the consumer to the producer
-// the delayed uses will not see a fixed reference in the PhysReg at that position, and may
-// incorrectly allocate that register.
-// TODO-CQ: This means that we may often require a copy at the use of this node's result.
-// This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is
-// created, causing a RefTypeFixedRef to be added at that location. This, however, results in
-// more PhysReg RefPositions (a throughput impact), and a large number of diffs that require
-// further analysis to determine benefit.
-// See Issue #11274.
-//
-void LinearScan::checkConflictingDefUse(RefPosition* useRP)
-{
- assert(useRP->refType == RefTypeUse);
- Interval* theInterval = useRP->getInterval();
- assert(!theInterval->isLocalVar);
-
- RefPosition* defRP = theInterval->firstRefPosition;
-
- // All defs must have a valid treeNode, but we check it below to be conservative.
- assert(defRP->treeNode != nullptr);
- regMaskTP prevAssignment = defRP->registerAssignment;
- regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment);
- if (newAssignment != RBM_NONE)
- {
- if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses)
- {
- defRP->registerAssignment = newAssignment;
- }
- }
- else
- {
- theInterval->hasConflictingDefUse = true;
- }
-}
-
-void LinearScan::associateRefPosWithInterval(RefPosition* rp)
-{
- Referenceable* theReferent = rp->referent;
-
- if (theReferent != nullptr)
- {
- // All RefPositions except the dummy ones at the beginning of blocks
-
- if (rp->isIntervalRef())
- {
- Interval* theInterval = rp->getInterval();
-
- applyCalleeSaveHeuristics(rp);
-
- if (theInterval->isLocalVar)
- {
- if (RefTypeIsUse(rp->refType))
- {
- RefPosition* const prevRP = theInterval->recentRefPosition;
- if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum))
- {
- prevRP->lastUse = false;
- }
- }
-
- rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) &&
- (rp->refType != RefTypeZeroInit) && !extendLifetimes();
- }
- else if (rp->refType == RefTypeUse)
- {
- checkConflictingDefUse(rp);
- rp->lastUse = true;
- }
- }
-
- RefPosition* prevRP = theReferent->recentRefPosition;
- if (prevRP != nullptr)
- {
- prevRP->nextRefPosition = rp;
- }
- else
- {
- theReferent->firstRefPosition = rp;
- }
- theReferent->recentRefPosition = rp;
- theReferent->lastRefPosition = rp;
- }
- else
- {
- assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs));
- }
-}
-
-//---------------------------------------------------------------------------
-// newRefPosition: allocate and initialize a new RefPosition.
-//
-// Arguments:
-// reg - reg number that identifies RegRecord to be associated
-// with this RefPosition
-// theLocation - LSRA location of RefPosition
-// theRefType - RefPosition type
-// theTreeNode - GenTree node for which this RefPosition is created
-// mask - Set of valid registers for this RefPosition
-// multiRegIdx - register position if this RefPosition corresponds to a
-// multi-reg call node.
-//
-// Return Value:
-// a new RefPosition
-//
-RefPosition* LinearScan::newRefPosition(
- regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask)
-{
- RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
-
- newRP->setReg(getRegisterRecord(reg));
- newRP->registerAssignment = mask;
-
- newRP->setMultiRegIdx(0);
- newRP->setAllocateIfProfitable(false);
-
- associateRefPosWithInterval(newRP);
-
- DBEXEC(VERBOSE, newRP->dump());
- return newRP;
-}
-
-//---------------------------------------------------------------------------
-// newRefPosition: allocate and initialize a new RefPosition.
-//
-// Arguments:
-// theInterval - interval to which RefPosition is associated with.
-// theLocation - LSRA location of RefPosition
-// theRefType - RefPosition type
-// theTreeNode - GenTree node for which this RefPosition is created
-// mask - Set of valid registers for this RefPosition
-// multiRegIdx - register position if this RefPosition corresponds to a
-// multi-reg call node.
-//
-// Return Value:
-// a new RefPosition
-//
-RefPosition* LinearScan::newRefPosition(Interval* theInterval,
- LsraLocation theLocation,
- RefType theRefType,
- GenTree* theTreeNode,
- regMaskTP mask,
- unsigned multiRegIdx /* = 0 */)
-{
-#ifdef DEBUG
- if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
- {
- // In the case we're using floating point registers we must make sure
- // this flag was set previously in the compiler since this will mandate
- // whether LSRA will take into consideration FP reg killsets.
- assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0));
- }
-#endif // DEBUG
-
- // If this reference is constrained to a single register (and it's not a dummy
- // or Kill reftype already), add a RefTypeFixedReg at this location so that its
- // availability can be more accurately determined
-
- bool isFixedRegister = isSingleRegister(mask);
- bool insertFixedRef = false;
- if (isFixedRegister)
- {
- // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB)
- if (theRefType == RefTypeUse || theRefType == RefTypeDef)
- {
- insertFixedRef = true;
- }
- }
-
- if (insertFixedRef)
- {
- regNumber physicalReg = genRegNumFromMask(mask);
- RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
- assert(theInterval != nullptr);
- assert((allRegs(theInterval->registerType) & mask) != 0);
- }
-
- RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
-
- newRP->setInterval(theInterval);
-
- // Spill info
- newRP->isFixedRegRef = isFixedRegister;
-
-#ifndef _TARGET_AMD64_
- // We don't need this for AMD because the PInvoke method epilog code is explicit
- // at register allocation time.
- if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged &&
- theInterval->varNum == compiler->genReturnLocal)
- {
- mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
- noway_assert(mask != RBM_NONE);
- }
-#endif // !_TARGET_AMD64_
- newRP->registerAssignment = mask;
-
- newRP->setMultiRegIdx(multiRegIdx);
- newRP->setAllocateIfProfitable(false);
-
- associateRefPosWithInterval(newRP);
-
- DBEXEC(VERBOSE, newRP->dump());
- return newRP;
-}
-
/*****************************************************************************
* Inline functions for Interval
*****************************************************************************/
@@ -1122,6 +664,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
#endif // MEASURE_MEM_ALLOC
, intervals(LinearScanMemoryAllocatorInterval(theCompiler))
, refPositions(LinearScanMemoryAllocatorRefPosition(theCompiler))
+ , listNodePool(theCompiler)
{
#ifdef DEBUG
maxNodeLocation = 0;
@@ -1949,42 +1492,6 @@ void LinearScan::identifyCandidatesExceptionDataflow()
}
}
-//------------------------------------------------------------------------
-// IsContainableMemoryOp: Checks whether this is a memory op that can be contained.
-//
-// Arguments:
-// node - the node of interest.
-//
-// Return value:
-// True if this will definitely be a memory reference that could be contained.
-//
-// Notes:
-// This differs from the isMemoryOp() method on GenTree because it checks for
-// the case of doNotEnregister local. This won't include locals that
-// for some other reason do not become register candidates, nor those that get
-// spilled.
-// Also, because we usually call this before we redo dataflow, any new lclVars
-// introduced after the last dataflow analysis will not yet be marked lvTracked,
-// so we don't use that.
-//
-bool LinearScan::isContainableMemoryOp(GenTree* node)
-{
- if (node->isMemoryOp())
- {
- return true;
- }
- if (node->IsLocal())
- {
- if (!enregisterLocalVars)
- {
- return true;
- }
- LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum];
- return varDsc->lvDoNotEnregister;
- }
- return false;
-}
-
bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
{
// We shouldn't be called if opt settings do not permit register variables.
@@ -2618,24 +2125,6 @@ VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarT
return inVarToRegMap;
}
-// given a tree node
-RefType refTypeForLocalRefNode(GenTree* node)
-{
- assert(node->IsLocal());
-
- // We don't support updates
- assert((node->gtFlags & GTF_VAR_USEASG) == 0);
-
- if (node->gtFlags & GTF_VAR_DEF)
- {
- return RefTypeDef;
- }
- else
- {
- return RefTypeUse;
- }
-}
-
//------------------------------------------------------------------------
// checkLastUses: Check correctness of last use flags
//
@@ -2776,1486 +2265,6 @@ void LinearScan::checkLastUses(BasicBlock* block)
}
#endif // DEBUG
-void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
-{
- for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
- {
- if (mask & 1)
- {
- // This assumes that these are all "special" RefTypes that
- // don't need to be recorded on the tree (hence treeNode is nullptr)
- RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
- genRegMask(reg)); // This MUST occupy the physical register (obviously)
-
- if (isLastUse)
- {
- pos->lastUse = true;
- }
- }
- }
-}
-
-//------------------------------------------------------------------------
-// getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node.
-// If the GT_STOREIND will generate a write barrier, determine the specific kill
-// set required by the case-specific, platform-specific write barrier. If no
-// write barrier is required, the kill set will be RBM_NONE.
-//
-// Arguments:
-// tree - the GT_STOREIND node
-//
-// Return Value: a register mask of the registers killed
-//
-regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
-{
- assert(tree->OperIs(GT_STOREIND));
-
- regMaskTP killMask = RBM_NONE;
-
- GenTree* data = tree->Data();
-
- GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data);
- if (writeBarrierForm != GCInfo::WBF_NoBarrier)
- {
- if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm))
- {
- // We can't determine the exact helper to be used at this point, because it depends on
- // the allocated register for the `data` operand. However, all the (x86) optimized
- // helpers have the same kill set: EDX.
- killMask = RBM_CALLEE_TRASH_NOGC;
- }
- else
- {
- // Figure out which helper we're going to use, and then get the kill set for that helper.
- CorInfoHelpFunc helper =
- compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm);
- killMask = compiler->compHelperCallKillSet(helper);
- }
- }
-
- return killMask;
-}
-
-//------------------------------------------------------------------------
-// getKillSetForNode: Return the registers killed by the given tree node.
-//
-// Arguments:
-// compiler - the compiler context to use
-// tree - the tree for which the kill set is needed.
-//
-// Return Value: a register mask of the registers killed
-//
-regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
-{
- regMaskTP killMask = RBM_NONE;
- switch (tree->OperGet())
- {
-#ifdef _TARGET_XARCH_
- case GT_MUL:
- // We use the 128-bit multiply when performing an overflow checking unsigned multiply
- //
- if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx())
- {
- // Both RAX and RDX are killed by the operation
- killMask = RBM_RAX | RBM_RDX;
- }
- break;
-
- case GT_MULHI:
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- case GT_MUL_LONG:
-#endif
- killMask = RBM_RAX | RBM_RDX;
- break;
-
- case GT_MOD:
- case GT_DIV:
- case GT_UMOD:
- case GT_UDIV:
- if (!varTypeIsFloating(tree->TypeGet()))
- {
- // Both RAX and RDX are killed by the operation
- killMask = RBM_RAX | RBM_RDX;
- }
- break;
-#endif // _TARGET_XARCH_
-
- case GT_STORE_OBJ:
- if (tree->OperIsCopyBlkOp())
- {
- assert(tree->AsObj()->gtGcPtrCount != 0);
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
- break;
- }
- __fallthrough;
-
- case GT_STORE_BLK:
- case GT_STORE_DYN_BLK:
- {
- GenTreeBlk* blkNode = tree->AsBlk();
- bool isCopyBlk = varTypeIsStruct(blkNode->Data());
- switch (blkNode->gtBlkOpKind)
- {
- case GenTreeBlk::BlkOpKindHelper:
- if (isCopyBlk)
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
- }
- else
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
- }
- break;
-
-#ifdef _TARGET_XARCH_
- case GenTreeBlk::BlkOpKindRepInstr:
- if (isCopyBlk)
- {
- // rep movs kills RCX, RDI and RSI
- killMask = RBM_RCX | RBM_RDI | RBM_RSI;
- }
- else
- {
- // rep stos kills RCX and RDI.
- // (Note that the Data() node, if not constant, will be assigned to
- // RCX, but it's find that this kills it, as the value is not available
- // after this node in any case.)
- killMask = RBM_RDI | RBM_RCX;
- }
- break;
-#else
- case GenTreeBlk::BlkOpKindRepInstr:
-#endif
- case GenTreeBlk::BlkOpKindUnroll:
- case GenTreeBlk::BlkOpKindInvalid:
- // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
- break;
- }
- }
- break;
-
- case GT_RETURNTRAP:
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
- break;
- case GT_CALL:
-#ifdef _TARGET_X86_
- if (compiler->compFloatingPointUsed)
- {
- if (tree->TypeGet() == TYP_DOUBLE)
- {
- needDoubleTmpForFPCall = true;
- }
- else if (tree->TypeGet() == TYP_FLOAT)
- {
- needFloatTmpForFPCall = true;
- }
- }
-#endif // _TARGET_X86_
-#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
- if (tree->IsHelperCall())
- {
- GenTreeCall* call = tree->AsCall();
- CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
- killMask = compiler->compHelperCallKillSet(helpFunc);
- }
- else
-#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
- {
- // if there is no FP used, we can ignore the FP kills
- if (compiler->compFloatingPointUsed)
- {
- killMask = RBM_CALLEE_TRASH;
- }
- else
- {
- killMask = RBM_INT_CALLEE_TRASH;
- }
-#ifdef _TARGET_ARM_
- if (tree->AsCall()->IsVirtualStub())
- {
- killMask |= compiler->virtualStubParamInfo->GetRegMask();
- }
-#else // !_TARGET_ARM_
- // Verify that the special virtual stub call registers are in the kill mask.
- // We don't just add them unconditionally to the killMask because for most architectures
- // they are already in the RBM_CALLEE_TRASH set,
- // and we don't want to introduce extra checks and calls in this hot function.
- assert(!tree->AsCall()->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) ==
- compiler->virtualStubParamInfo->GetRegMask()));
-#endif
- }
- break;
- case GT_STOREIND:
- killMask = getKillSetForStoreInd(tree->AsStoreInd());
- break;
-
-#if defined(PROFILING_SUPPORTED)
- // If this method requires profiler ELT hook then mark these nodes as killing
- // callee trash registers (excluding RAX and XMM0). The reason for this is that
- // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for
- // more details.
- case GT_RETURN:
- if (compiler->compIsProfilerHookNeeded())
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE);
- }
- break;
-
- case GT_PROF_HOOK:
- if (compiler->compIsProfilerHookNeeded())
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL);
- }
- break;
-#endif // PROFILING_SUPPORTED
-
- default:
- // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
- break;
- }
- return killMask;
-}
-
-//------------------------------------------------------------------------
-// buildKillPositionsForNode:
-// Given some tree node add refpositions for all the registers this node kills
-//
-// Arguments:
-// tree - the tree for which kill positions should be generated
-// currentLoc - the location at which the kills should be added
-//
-// Return Value:
-// true - kills were inserted
-// false - no kills were inserted
-//
-// Notes:
-// The return value is needed because if we have any kills, we need to make sure that
-// all defs are located AFTER the kills. On the other hand, if there aren't kills,
-// the multiple defs for a regPair are in different locations.
-// If we generate any kills, we will mark all currentLiveVars as being preferenced
-// to avoid the killed registers. This is somewhat conservative.
-
-bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc)
-{
- regMaskTP killMask = getKillSetForNode(tree);
- bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
- if (killMask != RBM_NONE)
- {
- // The killMask identifies a set of registers that will be used during codegen.
- // Mark these as modified here, so when we do final frame layout, we'll know about
- // all these registers. This is especially important if killMask contains
- // callee-saved registers, which affect the frame size since we need to save/restore them.
- // In the case where we have a copyBlk with GC pointers, can need to call the
- // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if
- // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen,
- // which is too late.
- compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true));
-
- addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);
-
- // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee
- // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs,
- // many of which appear to be regressions (because there is more spill on the infrequently path),
- // but are not really because the frequent path becomes smaller. Validating these diffs will need
- // to be done before making this change.
- // if (!blockSequence[curBBSeqNum]->isRunRarely())
- if (enregisterLocalVars)
- {
- VarSetOps::Iter iter(compiler, currentLiveVars);
- unsigned varIndex = 0;
- while (iter.NextElem(&varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
- {
- if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
- {
- continue;
- }
- }
- else
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if (varTypeIsFloating(varDsc) &&
- !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex))
- {
- continue;
- }
- Interval* interval = getIntervalForLocalVar(varIndex);
- if (isCallKill)
- {
- interval->preferCalleeSave = true;
- }
- regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask);
-
- if (newPreferences != RBM_NONE)
- {
- interval->updateRegisterPreferences(newPreferences);
- }
- else
- {
- // If there are no callee-saved registers, the call could kill all the registers.
- // This is a valid state, so in that case assert should not trigger. The RA will spill in order to
- // free a register later.
- assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE);
- }
- }
- }
-
- if (compiler->killGCRefs(tree))
- {
- RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree,
- (allRegs(TYP_REF) & ~RBM_ARG_REGS));
- }
- return true;
- }
-
- return false;
-}
-
-//----------------------------------------------------------------------------
-// defineNewInternalTemp: Defines a ref position for an internal temp.
-//
-// Arguments:
-// tree - Gentree node requiring an internal register
-// regType - Register type
-// currentLoc - Location of the temp Def position
-// regMask - register mask of candidates for temp
-//
-RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask)
-{
- Interval* current = newInterval(regType);
- current->isInternal = true;
- return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0);
-}
-
-//------------------------------------------------------------------------
-// buildInternalRegisterDefsForNode - build Def positions for internal
-// registers required for tree node.
-//
-// Arguments:
-// tree - Gentree node that needs internal registers
-// temps - in-out array which is populated with ref positions
-// created for Def of internal registers
-//
-// Returns:
-// The total number of Def positions created for internal registers of tree no.
-int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* temps[])
-{
- int count;
- int internalIntCount = info->internalIntCount;
- regMaskTP internalCands = info->getInternalCandidates(this);
-
- // If the number of internal integer registers required is the same as the number of candidate integer registers in
- // the candidate set, then they must be handled as fixed registers.
- // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
- bool fixedRegs = false;
- regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
- if (((int)genCountBits(internalIntCandidates)) == internalIntCount)
- {
- fixedRegs = true;
- }
-
- for (count = 0; count < internalIntCount; count++)
- {
- regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT));
- if (fixedRegs)
- {
- internalIntCands = genFindLowestBit(internalIntCands);
- internalCands &= ~internalIntCands;
- }
- temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands);
- }
-
- int internalFloatCount = info->internalFloatCount;
- for (int i = 0; i < internalFloatCount; i++)
- {
- regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates());
- temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, internalFPCands);
- }
-
- assert(count < MaxInternalRegisters);
- assert(count == (internalIntCount + internalFloatCount));
- return count;
-}
-
-//------------------------------------------------------------------------
-// buildInternalRegisterUsesForNode - adds Use positions for internal
-// registers required for tree node.
-//
-// Arguments:
-// tree - Gentree node that needs internal registers
-// defs - int array containing Def positions of internal
-// registers.
-// total - Total number of Def positions in 'defs' array.
-//
-// Returns:
-// Void.
-void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total)
-{
- assert(total < MaxInternalRegisters);
-
- // defs[] has been populated by buildInternalRegisterDefsForNode
- // now just add uses to the defs previously added.
- for (int i = 0; i < total; i++)
- {
- RefPosition* prevRefPosition = defs[i];
- assert(prevRefPosition != nullptr);
- regMaskTP mask = prevRefPosition->registerAssignment;
- if (prevRefPosition->isPhysRegRef)
- {
- newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask);
- }
- else
- {
- RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0);
-
- if (info->isInternalRegDelayFree)
- {
- newest->delayRegFree = true;
- }
- }
- }
-}
-
-RegisterType LinearScan::getDefType(GenTree* tree)
-{
- return tree->TypeGet();
-}
-
-//------------------------------------------------------------------------
-// LocationInfoListNodePool: manages a pool of `LocationInfoListNode`
-// values to decrease overall memory usage
-// during `buildIntervals`.
-//
-// `buildIntervals` involves creating a list of location info values per
-// node that either directly produces a set of registers or that is a
-// contained node with register-producing sources. However, these lists
-// are short-lived: they are destroyed once the use of the corresponding
-// node is processed. As such, there is typically only a small number of
-// `LocationInfoListNode` values in use at any given time. Pooling these
-// values avoids otherwise frequent allocations.
-class LocationInfoListNodePool final
-{
- LocationInfoListNode* m_freeList;
- Compiler* m_compiler;
-
-public:
- //------------------------------------------------------------------------
- // LocationInfoListNodePool::LocationInfoListNodePool:
- // Creates a pool of `LocationInfoListNode` values.
- //
- // Arguments:
- // compiler - The compiler context.
- // preallocate - The number of nodes to preallocate.
- //
- LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = 0) : m_compiler(compiler)
- {
- if (preallocate > 0)
- {
- size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate;
- LocationInfoListNode* preallocatedNodes =
- reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA));
-
- LocationInfoListNode* head = preallocatedNodes;
- head->m_next = nullptr;
-
- for (unsigned i = 1; i < preallocate; i++)
- {
- LocationInfoListNode* node = &preallocatedNodes[i];
- node->m_next = head;
- head = node;
- }
-
- m_freeList = head;
- }
- }
-
- //------------------------------------------------------------------------
- // LocationInfoListNodePool::GetNode: Fetches an unused node from the
- // pool.
- //
- // Arguments:
- // l - - The `LsraLocation` for the `LocationInfo` value.
- // i - The interval for the `LocationInfo` value.
- // t - The IR node for the `LocationInfo` value
- // regIdx - The register index for the `LocationInfo` value.
- //
- // Returns:
- // A pooled or newly-allocated `LocationInfoListNode`, depending on the
- // contents of the pool.
- LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
- {
- LocationInfoListNode* head = m_freeList;
- if (head == nullptr)
- {
- head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode)));
- }
- else
- {
- m_freeList = head->m_next;
- }
-
- head->loc = l;
- head->interval = i;
- head->treeNode = t;
- head->m_next = nullptr;
-
- return head;
- }
-
- //------------------------------------------------------------------------
- // LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the
- // pool.
- //
- // Arguments:
- // list - The list to return.
- //
- void ReturnNodes(LocationInfoList& list)
- {
- assert(list.m_head != nullptr);
- assert(list.m_tail != nullptr);
-
- LocationInfoListNode* head = m_freeList;
- list.m_tail->m_next = head;
- m_freeList = list.m_head;
-
- list.m_head = nullptr;
- list.m_tail = nullptr;
- }
-};
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-VARSET_VALRET_TP
-LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc)
-{
- assert(enregisterLocalVars);
- VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler));
- regMaskTP fpCalleeKillSet = RBM_NONE;
- if (!VarSetOps::IsEmpty(compiler, largeVectorVars))
- {
- // We actually need to find any calls that kill the upper-half of the callee-save vector registers.
- // But we will use as a proxy any node that kills floating point registers.
- // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
- fpCalleeKillSet = getKillSetForNode(tree);
- if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
- {
- VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
- VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
- VarSetOps::Iter iter(compiler, liveLargeVectors);
- unsigned varIndex = 0;
- while (iter.NextElem(&varIndex))
- {
- Interval* varInterval = getIntervalForLocalVar(varIndex);
- Interval* tempInterval = newInterval(varInterval->registerType);
- tempInterval->isInternal = true;
- RefPosition* pos =
- newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
- // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
- // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
- // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval,
- // and set varInterval as the relatedInterval of tempInterval.
- tempInterval->relatedInterval = varInterval->relatedInterval;
- varInterval->relatedInterval = tempInterval;
- }
- }
- }
- return liveLargeVectors;
-}
-
-void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree,
- LsraLocation currentLoc,
- VARSET_VALARG_TP liveLargeVectors)
-{
- assert(enregisterLocalVars);
- if (!VarSetOps::IsEmpty(compiler, liveLargeVectors))
- {
- VarSetOps::Iter iter(compiler, liveLargeVectors);
- unsigned varIndex = 0;
- while (iter.NextElem(&varIndex))
- {
- Interval* varInterval = getIntervalForLocalVar(varIndex);
- Interval* tempInterval = varInterval->relatedInterval;
- assert(tempInterval->isInternal == true);
- RefPosition* pos =
- newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED);
- // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
- // of tempInterval.
- varInterval->relatedInterval = tempInterval->relatedInterval;
- tempInterval->relatedInterval = varInterval;
- }
- }
-}
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-
-#ifdef DEBUG
-//------------------------------------------------------------------------
-// ComputeOperandDstCount: computes the number of registers defined by a
-// node.
-//
-// For most nodes, this is simple:
-// - Nodes that do not produce values (e.g. stores and other void-typed
-// nodes) and nodes that immediately use the registers they define
-// produce no registers
-// - Nodes that are marked as defining N registers define N registers.
-//
-// For contained nodes, however, things are more complicated: for purposes
-// of bookkeeping, a contained node is treated as producing the transitive
-// closure of the registers produced by its sources.
-//
-// Arguments:
-// operand - The operand for which to compute a register count.
-//
-// Returns:
-// The number of registers defined by `operand`.
-//
-static int ComputeOperandDstCount(GenTree* operand)
-{
- // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though
- // note that it is not in the linear order. It seems best to check for !IsLIR() rather than
- // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method.
- if (!operand->IsLIR())
- {
- return 0;
- }
- if (operand->isContained())
- {
- int dstCount = 0;
- for (GenTree* op : operand->Operands())
- {
- dstCount += ComputeOperandDstCount(op);
- }
-
- return dstCount;
- }
- if (operand->IsUnusedValue())
- {
- // Operands that define an unused value do not produce any registers.
- return 0;
- }
- if (operand->IsValue())
- {
- // Operands that are values and are not contained consume all of their operands
- // and produce one or more registers.
- return operand->GetRegisterDstCount();
- }
- else
- {
- // This must be one of the operand types that are neither contained nor produce a value.
- // Stores and void-typed operands may be encountered when processing call nodes, which contain
- // pointers to argument setup stores.
- assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
- operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() ||
- operand->TypeGet() == TYP_VOID);
- return 0;
- }
-}
-
-//------------------------------------------------------------------------
-// ComputeAvailableSrcCount: computes the number of registers available as
-// sources for a node.
-//
-// This is simply the sum of the number of registers produced by each
-// operand to the node.
-//
-// Arguments:
-// node - The node for which to compute a source count.
-//
-// Retures:
-// The number of registers available as sources for `node`.
-//
-static int ComputeAvailableSrcCount(GenTree* node)
-{
- int numSources = 0;
- for (GenTree* operand : node->Operands())
- {
- numSources += ComputeOperandDstCount(operand);
- }
-
- return numSources;
-}
-#endif // DEBUG
-
-void LinearScan::buildRefPositionsForNode(GenTree* tree,
- BasicBlock* block,
- LocationInfoListNodePool& listNodePool,
- LsraLocation currentLoc)
-{
-#ifdef _TARGET_ARM_
- assert(!isRegPairType(tree->TypeGet()));
-#endif // _TARGET_ARM_
-
- // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes.
- // GT_CLS_VAR nodes should have been eliminated by rationalizer.
- assert(tree->OperGet() != GT_ARGPLACE);
- assert(tree->OperGet() != GT_LIST);
- assert(tree->OperGet() != GT_CLS_VAR);
-
- // The LIR traversal visits only the first node in a GT_FIELD_LIST.
- assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead());
-
- // The set of internal temporary registers used by this node are stored in the
- // gtRsvdRegs register mask. Clear it out.
- tree->gtRsvdRegs = RBM_NONE;
-
-#ifdef DEBUG
- if (VERBOSE)
- {
- dumpDefList();
- compiler->gtDispTree(tree, nullptr, nullptr, true);
- }
-#endif // DEBUG
-
- // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will
- // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will
- // be thrown away when we're done.
- LocationInfoListNode* locationInfo = nullptr;
- TreeNodeInfo tempInfo;
- TreeNodeInfo* info = nullptr;
- if (!tree->isContained() && tree->IsValue())
- {
- locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree);
- info = &locationInfo->info;
- }
- else
- {
- info = &tempInfo;
- }
- info->Initialize(this, tree);
- TreeNodeInfoInit(tree, info);
-
-#ifdef DEBUG
- if (VERBOSE)
- {
- printf(" +");
- info->dump(this);
- tree->dumpLIRFlags();
- printf("\n");
- }
-#endif // DEBUG
-
- assert(info->IsValid(this));
- int consume = info->srcCount;
- int produce = info->dstCount;
-
-#ifdef DEBUG
- if (VERBOSE)
- {
- if (tree->isContained())
- {
- JITDUMP("Contained\n");
- }
- else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && info->isLocalDefUse)
- {
- JITDUMP("Unused\n");
- }
- else
- {
- JITDUMP(" consume=%d produce=%d\n", consume, produce);
- }
- }
-#endif // DEBUG
-
- assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume));
-
- if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD))
- {
- LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
- if (isCandidateVar(varDsc))
- {
- assert(consume == 0);
-
- // We handle tracked variables differently from non-tracked ones. If it is tracked,
- // we simply add a use or def of the tracked variable. Otherwise, for a use we need
- // to actually add the appropriate references for loading or storing the variable.
- //
- // It won't actually get used or defined until the appropriate ancestor tree node
- // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
- // to a call
-
- assert(varDsc->lvTracked);
- unsigned varIndex = varDsc->lvVarIndex;
-
- // We have only approximate last-use information at this point. This is because the
- // execution order doesn't actually reflect the true order in which the localVars
- // are referenced - but the order of the RefPositions will, so we recompute it after
- // RefPositions are built.
- // Use the old value for setting currentLiveVars - note that we do this with the
- // not-quite-correct setting of lastUse. However, this is OK because
- // 1) this is only for preferencing, which doesn't require strict correctness, and
- // 2) the cases where these out-of-order uses occur should not overlap a kill.
- // TODO-Throughput: clean this up once we have the execution order correct. At that point
- // we can update currentLiveVars at the same place that we create the RefPosition.
- if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
- {
- VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex);
- }
-
- if (!info->isLocalDefUse && !tree->isContained())
- {
- assert(produce != 0);
-
- locationInfo->interval = getIntervalForLocalVar(varIndex);
- defList.Append(locationInfo);
- }
- return;
- }
- }
-
- // Handle the case of local variable assignment
- Interval* varDefInterval = nullptr;
- RefType defRefType = RefTypeDef;
-
- GenTree* defNode = tree;
-
- // noAdd means the node creates a def but for purposes of map
- // management do not add it because data is not flowing up the
- // tree
-
- bool noAdd = info->isLocalDefUse;
- RefPosition* prevPos = nullptr;
-
- bool isSpecialPutArg = false;
-
- assert(!tree->OperIsAssignment());
- if (tree->OperIsLocalStore())
- {
- GenTreeLclVarCommon* const store = tree->AsLclVarCommon();
- assert((consume > 1) || (regType(store->gtOp1->TypeGet()) == regType(store->TypeGet())));
-
- LclVarDsc* varDsc = &compiler->lvaTable[store->gtLclNum];
- if (isCandidateVar(varDsc))
- {
- // We always push the tracked lclVar intervals
- assert(varDsc->lvTracked);
- unsigned varIndex = varDsc->lvVarIndex;
- varDefInterval = getIntervalForLocalVar(varIndex);
- defRefType = refTypeForLocalRefNode(tree);
- defNode = tree;
- if (produce == 0)
- {
- produce = 1;
- noAdd = true;
- }
-
- assert(consume <= MAX_RET_REG_COUNT);
- if (consume == 1)
- {
- // Get the location info for the register defined by the first operand.
- LocationInfoListNode& operandInfo = *(useList.Begin());
- assert(operandInfo.treeNode == tree->gtGetOp1());
-
- Interval* srcInterval = operandInfo.interval;
- if (srcInterval->relatedInterval == nullptr)
- {
- // Preference the source to the dest, unless this is a non-last-use localVar.
- // Note that the last-use info is not correct, but it is a better approximation than preferencing
- // the source to the dest, if the source's lifetime extends beyond the dest.
- if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0)
- {
- srcInterval->assignRelatedInterval(varDefInterval);
- }
- }
- else if (!srcInterval->isLocalVar)
- {
- // Preference the source to dest, if src is not a local var.
- srcInterval->assignRelatedInterval(varDefInterval);
- }
- }
-
- if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
- {
- VarSetOps::AddElemD(compiler, currentLiveVars, varIndex);
- }
- }
- else if (store->gtOp1->OperIs(GT_BITCAST))
- {
- store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet();
-
- // Get the location info for the register defined by the first operand.
- LocationInfoListNode& operandInfo = *(useList.Begin());
- assert(operandInfo.treeNode == tree->gtGetOp1());
-
- Interval* srcInterval = operandInfo.interval;
- srcInterval->registerType = regType(store->TypeGet());
-
- RefPosition* srcDefPosition = srcInterval->firstRefPosition;
- assert(srcDefPosition != nullptr);
- assert(srcDefPosition->refType == RefTypeDef);
- assert(srcDefPosition->treeNode == store->gtOp1);
-
- srcDefPosition->registerAssignment = allRegs(store->TypeGet());
- operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet()));
- }
- }
- else if (noAdd && produce == 0)
- {
- // Dead nodes may remain after tree rationalization, decomposition or lowering.
- // They should be marked as UnusedValue.
- // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation.
- assert(!noAdd || (produce != 0));
- }
-
- Interval* prefSrcInterval = nullptr;
-
- // If this is a binary operator that will be encoded with 2 operand fields
- // (i.e. the target is read-modify-write), preference the dst to op1.
-
- bool hasDelayFreeSrc = info->hasDelayFreeSrc;
-
-#if defined(DEBUG) && defined(_TARGET_X86_)
- // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode
- // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the
- // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in
- // situations like the following:
- //
- // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d>
- //
- // /--* t1026 ref
- // t1352 = * putarg_reg ref REG NA
- //
- // t342 = lclVar int V14 loc6 u:4 REG NA $50c
- //
- // t343 = const int 1 REG NA $41
- //
- // /--* t342 int
- // +--* t343 int
- // t344 = * + int REG NA $495
- //
- // t345 = lclVar int V04 arg4 u:2 REG NA $100
- //
- // /--* t344 int
- // +--* t345 int
- // t346 = * % int REG NA $496
- //
- // /--* t346 int
- // t1353 = * putarg_reg int REG NA
- //
- // t1354 = lclVar ref V52 tmp35 (last use) REG NA
- //
- // /--* t1354 ref
- // t1355 = * lea(b+0) byref REG NA
- //
- // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the
- // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register
- // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to
- // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be
- // available due to the special put arg.
- const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER;
-#else
- const bool supportsSpecialPutArg = true;
-#endif
-
- if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) &&
- (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0)
- {
- // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use,
- // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
- // (which is actually what we hope will happen).
- JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
-
- // Get the register information for the first operand of the node.
- LocationInfoListNode* operandDef = useList.Begin();
- assert(operandDef->treeNode == tree->gtGetOp1());
-
- // Preference the destination to the interval of the first register defined by the first operand.
- Interval* srcInterval = operandDef->interval;
- assert(srcInterval->isLocalVar);
- prefSrcInterval = srcInterval;
- isSpecialPutArg = true;
- }
-
- RefPosition* internalRefs[MaxInternalRegisters];
-
-#ifdef DEBUG
- // If we are constraining the registers for allocation, we will modify all the RefPositions
- // we've built for this node after we've created them. In order to do that, we'll remember
- // the last RefPosition prior to those created for this node.
- RefPositionIterator refPositionMark = refPositions.backPosition();
-#endif // DEBUG
-
- // Make intervals for all the 'internal' register requirements for this node,
- // where internal means additional registers required temporarily.
- // Create a RefTypeDef RefPosition for each such interval.
- int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs);
-
- // Make use RefPositions for all used values.
- int consumed = 0;
- for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end;
- listNode = listNode->Next())
- {
- LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode);
-
- // For tree temps, a use is always a last use and the end of the range;
- // this is set by default in newRefPosition
- GenTree* const useNode = locInfo.treeNode;
- assert(useNode != nullptr);
-
- Interval* srcInterval = locInfo.interval;
- TreeNodeInfo& useNodeInfo = locInfo.info;
- if (useNodeInfo.isTgtPref)
- {
- prefSrcInterval = srcInterval;
- }
-
- const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree);
-
- regMaskTP candidates = useNodeInfo.getSrcCandidates(this);
-#ifdef _TARGET_ARM_
- regMaskTP allCandidates = candidates;
-
- if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp())
- {
- // get i-th candidate, set bits in useCandidates must be in sequential order.
- candidates = genFindLowestReg(allCandidates);
- allCandidates &= ~candidates;
- }
-#endif // _TARGET_ARM_
-
- assert((candidates & allRegs(srcInterval->registerType)) != 0);
-
- // For non-localVar uses we record nothing, as nothing needs to be written back to the tree.
- GenTree* const refPosNode = srcInterval->isLocalVar ? useNode : nullptr;
- RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, 0);
- if (delayRegFree)
- {
- pos->delayRegFree = true;
- }
-
- if (useNode->IsRegOptional())
- {
- pos->setAllocateIfProfitable(true);
- }
- consumed++;
-
- // Create additional use RefPositions for multi-reg nodes.
- for (int idx = 1; idx < locInfo.info.dstCount; idx++)
- {
- noway_assert(srcInterval->relatedInterval != nullptr);
- srcInterval = srcInterval->relatedInterval;
-#ifdef _TARGET_ARM_
- if (useNode->OperIsPutArgSplit() ||
- (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST)))
- {
- // get first candidate, set bits in useCandidates must be in sequential order.
- candidates = genFindLowestReg(allCandidates);
- allCandidates &= ~candidates;
- }
-#endif // _TARGET_ARM_
- RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, idx);
- consumed++;
- }
- }
-
- assert(consumed == consume);
- if (consume != 0)
- {
- listNodePool.ReturnNodes(useList);
- }
-
- buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount);
-
- RegisterType registerType = getDefType(tree);
- regMaskTP candidates = info->getDstCandidates(this);
- regMaskTP useCandidates = info->getSrcCandidates(this);
-
-#ifdef DEBUG
- if (VERBOSE && produce)
- {
- printf("Def candidates ");
- dumpRegMask(candidates);
- printf(", Use candidates ");
- dumpRegMask(useCandidates);
- printf("\n");
- }
-#endif // DEBUG
-
-#if defined(_TARGET_AMD64_)
- // Multi-reg call node is the only node that could produce multi-reg value
- assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
-#endif // _TARGET_xxx_
-
- // Add kill positions before adding def positions
- buildKillPositionsForNode(tree, currentLoc + 1);
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- VARSET_TP liveLargeVectors(VarSetOps::UninitVal());
- if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE))
- {
- // Build RefPositions for saving any live large vectors.
- // This must be done after the kills, so that we know which large vectors are still live.
- VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1));
- }
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-
- ReturnTypeDesc* retTypeDesc = nullptr;
- bool isMultiRegCall = tree->IsMultiRegCall();
- if (isMultiRegCall)
- {
- retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
- assert((int)genCountBits(candidates) == produce);
- assert(candidates == retTypeDesc->GetABIReturnRegs());
- }
-
- // push defs
- LocationInfoList locationInfoList;
- LsraLocation defLocation = currentLoc + 1;
- Interval* interval = varDefInterval;
- // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field.
- // Keep track of the previous interval allocated, for that purpose.
- Interval* prevInterval = nullptr;
- for (int i = 0; i < produce; i++)
- {
- regMaskTP currCandidates = candidates;
-
- // In case of multi-reg call node, registerType is given by
- // the type of ith position return register.
- if (isMultiRegCall)
- {
- registerType = retTypeDesc->GetReturnRegType((unsigned)i);
- currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i));
- useCandidates = allRegs(registerType);
- }
-
-#ifdef _TARGET_ARM_
- // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order.
- if (tree->OperIsPutArgSplit() || tree->OperIsMultiRegOp())
- {
- // get i-th candidate
- currCandidates = genFindLowestReg(candidates);
- candidates &= ~currCandidates;
- }
-#endif // _TARGET_ARM_
-
- if (interval == nullptr)
- {
- // Make a new interval
- interval = newInterval(registerType);
- if (hasDelayFreeSrc || info->isInternalRegDelayFree)
- {
- interval->hasInterferingUses = true;
- }
- else if (tree->OperIsConst())
- {
- assert(!tree->IsReuseRegVal());
- interval->isConstant = true;
- }
-
- if ((currCandidates & useCandidates) != RBM_NONE)
- {
- interval->updateRegisterPreferences(currCandidates & useCandidates);
- }
-
- if (isSpecialPutArg)
- {
- interval->isSpecialPutArg = true;
- }
- }
- else
- {
- assert(registerTypesEquivalent(interval->registerType, registerType));
- }
-
- if (prefSrcInterval != nullptr)
- {
- interval->assignRelatedIntervalIfUnassigned(prefSrcInterval);
- }
-
- // for assignments, we want to create a refposition for the def
- // but not push it
- if (!noAdd)
- {
- if (i == 0)
- {
- locationInfo->interval = interval;
- prevInterval = interval;
- defList.Append(locationInfo);
- }
- else
- {
- // This is the 2nd or subsequent register defined by a multi-reg node.
- // Connect them using 'relatedInterval'.
- noway_assert(prevInterval != nullptr);
- prevInterval->relatedInterval = interval;
- prevInterval = interval;
- prevInterval->isMultiReg = true;
- interval->isMultiReg = true;
- }
- }
-
- RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i);
- if (info->isLocalDefUse)
- {
- // This must be an unused value, OR it is a special node for which we allocate
- // a target register even though it produces no value.
- assert(defNode->IsUnusedValue() || (defNode->gtOper == GT_LOCKADD));
- pos->isLocalDefUse = true;
- pos->lastUse = true;
- }
- interval->updateRegisterPreferences(currCandidates);
- interval->updateRegisterPreferences(useCandidates);
- interval = nullptr;
- }
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- // SaveDef position must be at the same location as Def position of call node.
- if (enregisterLocalVars)
- {
- buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors);
- }
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-
-#ifdef DEBUG
- // If we are constraining registers, modify all the RefPositions we've just built to specify the
- // minimum reg count required.
- if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT))
- {
- // The number of registers required for a tree node is the sum of
- // consume + produce + internalCount. This is the minimum
- // set of registers that needs to be ensured in candidate
- // set of ref positions created.
- unsigned minRegCount = consume + produce + info->internalIntCount + info->internalFloatCount;
-
- for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++)
- {
- RefPosition* newRefPosition = &(*refPositionMark);
- unsigned minRegCountForRef = minRegCount;
- if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree)
- {
- // If delayRegFree, then Use will interfere with the destination of the consuming node.
- // Therefore, we also need add the kill set of the consuming node to minRegCount.
- //
- // For example consider the following IR on x86, where v01 and v02
- // are method args coming in ecx and edx respectively.
- // GT_DIV(v01, v02)
- //
- // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node.
- //
- // Assume further JitStressRegs=2, which would constrain candidates to callee trashable
- // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01.
- // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and
- // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for
- // the use position of v02 also needs to take into account the kill set of its consuming node.
- regMaskTP killMask = getKillSetForNode(tree);
- if (killMask != RBM_NONE)
- {
- minRegCountForRef += genCountBits(killMask);
- }
- }
- newRefPosition->minRegCandidateCount = minRegCountForRef;
- if (newRefPosition->IsActualRef() && doReverseCallerCallee())
- {
- Interval* interval = newRefPosition->getInterval();
- regMaskTP oldAssignment = newRefPosition->registerAssignment;
- regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType);
- newRefPosition->registerAssignment =
- getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef);
- if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) &&
- !interval->isLocalVar)
- {
- checkConflictingDefUse(newRefPosition);
- }
- }
- }
- }
-#endif // DEBUG
- JITDUMP("\n");
-}
-
-// make an interval for each physical register
-void LinearScan::buildPhysRegRecords()
-{
- RegisterType regType = IntRegisterType;
- for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
- {
- RegRecord* curr = &physRegs[reg];
- curr->init(reg);
- }
-}
-
-BasicBlock* getNonEmptyBlock(BasicBlock* block)
-{
- while (block != nullptr && block->bbTreeList == nullptr)
- {
- BasicBlock* nextBlock = block->bbNext;
- // Note that here we use the version of NumSucc that does not take a compiler.
- // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan.
- // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which
- // case we don't need the version that takes a compiler.
- assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE)));
- // sometimes the first block is empty and ends with an uncond branch
- // assert( block->GetSucc(0) == nextBlock);
- block = nextBlock;
- }
- assert(block != nullptr && block->bbTreeList != nullptr);
- return block;
-}
-
-//------------------------------------------------------------------------
-// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block
-//
-// Notes:
-// Prior to calling this method, 'currentLiveVars' must be set to the set of register
-// candidate variables that are liveIn to the first block.
-// For each register candidate that is live-in to the first block:
-// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created.
-// - Otherwise, it will be marked as spilled, since it will not be assigned a register
-// on entry and will be loaded from memory on the undefined path.
-// Note that, when the compInitMem option is not set, we may encounter these on
-// paths that are protected by the same condition as an earlier def. However, since
-// we don't do the analysis to determine this - and couldn't rely on always identifying
-// such cases even if we tried - we must conservatively treat the undefined path as
-// being possible. This is a relatively rare case, so the introduced conservatism is
-// not expected to warrant the analysis required to determine the best placement of
-// an initialization.
-//
-void LinearScan::insertZeroInitRefPositions()
-{
- assert(enregisterLocalVars);
-#ifdef DEBUG
- VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn));
- assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars));
-#endif // DEBUG
-
- // insert defs for this, then a block boundary
-
- VarSetOps::Iter iter(compiler, currentLiveVars);
- unsigned varIndex = 0;
- while (iter.NextElem(&varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- if (!varDsc->lvIsParam && isCandidateVar(varDsc))
- {
- JITDUMP("V%02u was live in to first block:", varNum);
- Interval* interval = getIntervalForLocalVar(varIndex);
- if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))
- {
- JITDUMP(" creating ZeroInit\n");
- GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
- RefPosition* pos =
- newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
- varDsc->lvMustInit = true;
- }
- else
- {
- setIntervalAsSpilled(interval);
- JITDUMP(" marking as spilled\n");
- }
- }
- }
-}
-
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-// -----------------------------------------------------------------------
-// Sets the register state for an argument of type STRUCT for System V systems.
-// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
-// for how state for argument is updated for unix non-structs and Windows AMD64 structs.
-void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
-{
- assert(varTypeIsStruct(argDsc));
- RegState* intRegState = &compiler->codeGen->intRegState;
- RegState* floatRegState = &compiler->codeGen->floatRegState;
-
- if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA))
- {
- if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT))
- {
- assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS));
- floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
- }
- else
- {
- assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS));
- intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
- }
- }
-
- if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
- {
- if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT))
- {
- assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS));
- floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
- }
- else
- {
- assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS));
- intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
- }
- }
-}
-
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-
-//------------------------------------------------------------------------
-// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
-// regState (either compiler->intRegState or compiler->floatRegState),
-// with the lvArgReg on "argDsc"
-//
-// Arguments:
-// argDsc - the argument for which the state is to be updated.
-//
-// Return Value: None
-//
-// Assumptions:
-// The argument is live on entry to the function
-// (or is untracked and therefore assumed live)
-//
-// Notes:
-// This relies on a method in regAlloc.cpp that is shared between LSRA
-// and regAlloc. It is further abstracted here because regState is updated
-// separately for tracked and untracked variables in LSRA.
-//
-void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
-{
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
- // Handle them here.
- if (varTypeIsStruct(argDsc))
- {
- unixAmd64UpdateRegStateForArg(argDsc);
- }
- else
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- {
- RegState* intRegState = &compiler->codeGen->intRegState;
- RegState* floatRegState = &compiler->codeGen->floatRegState;
- // In the case of AMD64 we'll still use the floating point registers
- // to model the register usage for argument on vararg calls, so
- // we will ignore the varargs condition to determine whether we use
- // XMM registers or not for setting up the call.
- bool isFloat = (isFloatRegType(argDsc->lvType)
-#ifndef _TARGET_AMD64_
- && !compiler->info.compIsVarArgs
-#endif
- && !compiler->opts.compUseSoftFP);
-
- if (argDsc->lvIsHfaRegArg())
- {
- isFloat = true;
- }
-
- if (isFloat)
- {
- JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
- compiler->raUpdateRegStateForArg(floatRegState, argDsc);
- }
- else
- {
- JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
-#if FEATURE_MULTIREG_ARGS
- if (argDsc->lvOtherArgReg != REG_NA)
- {
- JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg));
- }
-#endif // FEATURE_MULTIREG_ARGS
- compiler->raUpdateRegStateForArg(intRegState, argDsc);
- }
- }
-}
-
//------------------------------------------------------------------------
// findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables.
//
@@ -4371,448 +2380,6 @@ BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block,
return predBlock;
}
-void LinearScan::buildIntervals()
-{
- BasicBlock* block;
-
- JITDUMP("\nbuildIntervals ========\n");
-
- // Now build (empty) records for all of the physical registers
- buildPhysRegRecords();
-
-#ifdef DEBUG
- if (VERBOSE)
- {
- printf("\n-----------------\n");
- printf("LIVENESS:\n");
- printf("-----------------\n");
- foreach_block(compiler, block)
- {
- printf("BB%02u use def in out\n", block->bbNum);
- dumpConvertedVarSet(compiler, block->bbVarUse);
- printf("\n");
- dumpConvertedVarSet(compiler, block->bbVarDef);
- printf("\n");
- dumpConvertedVarSet(compiler, block->bbLiveIn);
- printf("\n");
- dumpConvertedVarSet(compiler, block->bbLiveOut);
- printf("\n");
- }
- }
-#endif // DEBUG
-
-#if DOUBLE_ALIGN
- // We will determine whether we should double align the frame during
- // identifyCandidates(), but we initially assume that we will not.
- doDoubleAlign = false;
-#endif
-
- identifyCandidates();
-
- // Figure out if we're going to use a frame pointer. We need to do this before building
- // the ref positions, because those objects will embed the frame register in various register masks
- // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will
- // remove the frame pointer from the masks.
- setFrameType();
-
- DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE));
-
- // second part:
- JITDUMP("\nbuildIntervals second part ========\n");
- currentLoc = 0;
- // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were
- // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering.
- // Previously, the block sequencing was done for the (formerly separate) TreeNodeInfoInit pass,
- // and the curBBNum was left as the last block sequenced. This block was then used to set the
- // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the
- // normalized entry weight (compiler->fgCalledCount), but that results in a net regression.
- if (!blockSequencingDone)
- {
- setBlockSequence();
- }
- curBBNum = blockSequence[bbSeqCount - 1]->bbNum;
-
- // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex.
- // Assign these RefPositions to the (nonexistent) BB0.
- curBBNum = 0;
-
- LclVarDsc* argDsc;
- unsigned int lclNum;
-
- RegState* intRegState = &compiler->codeGen->intRegState;
- RegState* floatRegState = &compiler->codeGen->floatRegState;
- intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
- floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
-
- for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
- {
- lclNum = compiler->lvaTrackedToVarNum[varIndex];
- argDsc = &(compiler->lvaTable[lclNum]);
-
- if (!argDsc->lvIsParam)
- {
- continue;
- }
-
- // Only reserve a register if the argument is actually used.
- // Is it dead on entry? If compJmpOpUsed is true, then the arguments
- // have to be kept alive, so we have to consider it as live on entry.
- // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we
- // won't have done dataflow on it, but it needs to be marked as live-in so
- // it will get saved in the prolog.
- if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode)
- {
- continue;
- }
-
- if (argDsc->lvIsRegArg)
- {
- updateRegStateForArg(argDsc);
- }
-
- if (isCandidateVar(argDsc))
- {
- Interval* interval = getIntervalForLocalVar(varIndex);
- regMaskTP mask = allRegs(TypeGet(argDsc));
- if (argDsc->lvIsRegArg)
- {
- // Set this interval as currently assigned to that register
- regNumber inArgReg = argDsc->lvArgReg;
- assert(inArgReg < REG_COUNT);
- mask = genRegMask(inArgReg);
- assignPhysReg(inArgReg, interval);
- }
- RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask);
- }
- else if (varTypeIsStruct(argDsc->lvType))
- {
- for (unsigned fieldVarNum = argDsc->lvFieldLclStart;
- fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum)
- {
- LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]);
- if (fieldVarDsc->lvLRACandidate)
- {
- assert(fieldVarDsc->lvTracked);
- Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex);
- RefPosition* pos =
- newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc)));
- }
- }
- }
- else
- {
- // We can overwrite the register (i.e. codegen saves it on entry)
- assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister ||
- !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode));
- }
- }
-
- // Now set up the reg state for the non-tracked args
- // (We do this here because we want to generate the ParamDef RefPositions in tracked
- // order, so that loop doesn't hit the non-tracked args)
-
- for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++)
- {
- argDsc = &(compiler->lvaTable[argNum]);
-
- if (argDsc->lvPromotedStruct())
- {
- noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here
-
- unsigned fieldVarNum = argDsc->lvFieldLclStart;
- argDsc = &(compiler->lvaTable[fieldVarNum]);
- }
- noway_assert(argDsc->lvIsParam);
- if (!argDsc->lvTracked && argDsc->lvIsRegArg)
- {
- updateRegStateForArg(argDsc);
- }
- }
-
- // If there is a secret stub param, it is also live in
- if (compiler->info.compPublishStubParam)
- {
- intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
- }
-
- LocationInfoListNodePool listNodePool(compiler, 8);
-
- BasicBlock* predBlock = nullptr;
- BasicBlock* prevBlock = nullptr;
-
- // Initialize currentLiveVars to the empty set. We will set it to the current
- // live-in at the entry to each block (this will include the incoming args on
- // the first block).
- VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler));
-
- for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
- {
- JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum);
-
- bool predBlockIsAllocated = false;
- predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated));
- if (predBlock)
- {
- JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n",
- block->bbNum, predBlock->bbNum);
- assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
- blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
- }
-
- if (enregisterLocalVars)
- {
- VarSetOps::AssignNoCopy(compiler, currentLiveVars,
- VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn));
-
- if (block == compiler->fgFirstBB)
- {
- insertZeroInitRefPositions();
- // The first real location is at 1; 0 is for the entry.
- currentLoc = 1;
- }
-
- // Any lclVars live-in to a block are resolution candidates.
- VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars);
-
- // Determine if we need any DummyDefs.
- // We need DummyDefs for cases where "predBlock" isn't really a predecessor.
- // Note that it's possible to have uses of unitialized variables, in which case even the first
- // block may require DummyDefs, which we are not currently adding - this means that these variables
- // will always be considered to be in memory on entry (and reloaded when the use is encountered).
- // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized
- // variables (which may actually be initialized along the dynamically executed paths, but not
- // on all static paths), we wind up with excessive liveranges for some of these variables.
- VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars));
- if (predBlock)
- {
- // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut
- VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut);
- }
- bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB);
-
- // Create dummy def RefPositions
-
- if (needsDummyDefs)
- {
- // If we are using locations from a predecessor, we should never require DummyDefs.
- assert(!predBlockIsAllocated);
-
- JITDUMP("Creating dummy definitions\n");
- VarSetOps::Iter iter(compiler, newLiveIn);
- unsigned varIndex = 0;
- while (iter.NextElem(&varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- // Add a dummyDef for any candidate vars that are in the "newLiveIn" set.
- // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs).
- if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam))
- {
- Interval* interval = getIntervalForLocalVar(varIndex);
- RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr,
- allRegs(interval->registerType));
- }
- }
- JITDUMP("Finished creating dummy definitions\n\n");
- }
- }
-
- // Add a dummy RefPosition to mark the block boundary.
- // Note that we do this AFTER adding the exposed uses above, because the
- // register positions for those exposed uses need to be recorded at
- // this point.
-
- RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
- currentLoc += 2;
- JITDUMP("\n");
-
- LIR::Range& blockRange = LIR::AsRange(block);
- for (GenTree* node : blockRange.NonPhiNodes())
- {
- // We increment the number position of each tree node by 2 to simplify the logic when there's the case of
- // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to
- // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only
- // increment the number position by one.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
- node->gtSeqNum = currentLoc;
- // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register
- // value.
- // Although this looks like a no-op it sets the tag.
- node->gtRegNum = node->gtRegNum;
-#endif
-
- buildRefPositionsForNode(node, block, listNodePool, currentLoc);
-
-#ifdef DEBUG
- if (currentLoc > maxNodeLocation)
- {
- maxNodeLocation = currentLoc;
- }
-#endif // DEBUG
- currentLoc += 2;
- }
-
- // Note: the visited set is cleared in LinearScan::doLinearScan()
- markBlockVisited(block);
- assert(defList.IsEmpty());
-
- if (enregisterLocalVars)
- {
- // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the
- // next block, or any unvisited successors.
- // This will address lclVars that are live on a backedge, as well as those that are kept
- // live at a GT_JMP.
- //
- // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP,
- // and jmp call is represented using GT_JMP node which is a leaf node.
- // Liveness phase keeps all the arguments of the method live till the end of
- // block by adding them to liveout set of the block containing GT_JMP.
- //
- // The target of a GT_JMP implicitly uses all the current method arguments, however
- // there are no actual references to them. This can cause LSRA to assert, because
- // the variables are live but it sees no references. In order to correctly model the
- // liveness of these arguments, we add dummy exposed uses, in the same manner as for
- // backward branches. This will happen automatically via expUseSet.
- //
- // Note that a block ending with GT_JMP has no successors and hence the variables
- // for which dummy use ref positions are added are arguments of the method.
-
- VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut));
- VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars);
- BasicBlock* nextBlock = getNextBlock();
- if (nextBlock != nullptr)
- {
- VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn);
- }
- for (BasicBlock* succ : block->GetAllSuccs(compiler))
- {
- if (VarSetOps::IsEmpty(compiler, expUseSet))
- {
- break;
- }
-
- if (isBlockVisited(succ))
- {
- continue;
- }
- VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn);
- }
-
- if (!VarSetOps::IsEmpty(compiler, expUseSet))
- {
- JITDUMP("Exposed uses:");
- VarSetOps::Iter iter(compiler, expUseSet);
- unsigned varIndex = 0;
- while (iter.NextElem(&varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- assert(isCandidateVar(varDsc));
- Interval* interval = getIntervalForLocalVar(varIndex);
- RefPosition* pos =
- newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
- JITDUMP(" V%02u", varNum);
- }
- JITDUMP("\n");
- }
-
- // Clear the "last use" flag on any vars that are live-out from this block.
- {
- VarSetOps::Iter iter(compiler, block->bbLiveOut);
- unsigned varIndex = 0;
- while (iter.NextElem(&varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* const varDsc = &compiler->lvaTable[varNum];
- if (isCandidateVar(varDsc))
- {
- RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition;
- if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum))
- {
- lastRP->lastUse = false;
- }
- }
- }
- }
-
-#ifdef DEBUG
- checkLastUses(block);
-
- if (VERBOSE)
- {
- printf("use: ");
- dumpConvertedVarSet(compiler, block->bbVarUse);
- printf("\ndef: ");
- dumpConvertedVarSet(compiler, block->bbVarDef);
- printf("\n");
- }
-#endif // DEBUG
- }
-
- prevBlock = block;
- }
-
- if (enregisterLocalVars)
- {
- if (compiler->lvaKeepAliveAndReportThis())
- {
- // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end
- unsigned keepAliveVarNum = compiler->info.compThisArg;
- assert(compiler->info.compIsStatic == false);
- LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum;
- if (isCandidateVar(varDsc))
- {
- JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n");
- Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
- RefPosition* pos =
- newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
- }
- }
-
-#ifdef DEBUG
- if (getLsraExtendLifeTimes())
- {
- LclVarDsc* varDsc;
- for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
- {
- if (varDsc->lvLRACandidate)
- {
- JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum);
- Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
- RefPosition* pos =
- newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
- }
- }
- }
-#endif // DEBUG
- }
-
- // If the last block has successors, create a RefTypeBB to record
- // what's live
-
- if (prevBlock->NumSucc(compiler) > 0)
- {
- RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
- }
-
-#ifdef DEBUG
- // Make sure we don't have any blocks that were not visited
- foreach_block(compiler, block)
- {
- assert(isBlockVisited(block));
- }
-
- if (VERBOSE)
- {
- lsraDumpIntervals("BEFORE VALIDATING INTERVALS");
- dumpRefPositions("BEFORE VALIDATING INTERVALS");
- validateIntervals();
- }
-#endif // DEBUG
-}
-
#ifdef DEBUG
void LinearScan::dumpVarRefPositions(const char* title)
{
@@ -4838,46 +2405,6 @@ void LinearScan::dumpVarRefPositions(const char* title)
}
}
-void LinearScan::validateIntervals()
-{
- if (enregisterLocalVars)
- {
- for (unsigned i = 0; i < compiler->lvaTrackedCount; i++)
- {
- if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate)
- {
- continue;
- }
- Interval* interval = getIntervalForLocalVar(i);
-
- bool defined = false;
- printf("-----------------\n");
- for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
- {
- ref->dump();
- RefType refType = ref->refType;
- if (!defined && RefTypeIsUse(refType))
- {
- if (compiler->info.compMethodName != nullptr)
- {
- printf("%s: ", compiler->info.compMethodName);
- }
- printf("LocalVar V%02u: undefined use at %u\n", interval->varNum, ref->nodeLocation);
- }
- // Note that there can be multiple last uses if they are on disjoint paths,
- // so we can't really check the lastUse flag
- if (ref->lastUse)
- {
- defined = false;
- }
- if (RefTypeIsDef(refType))
- {
- defined = true;
- }
- }
- }
- }
-}
#endif // DEBUG
// Set the default rpFrameType based upon codeGen->isFramePointerRequired()
@@ -10847,206 +8374,6 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
}
}
-//------------------------------------------------------------------------
-// GetIndirInfo: Get the source registers for an indirection that might be contained.
-//
-// Arguments:
-// node - The node of interest
-//
-// Return Value:
-// The number of source registers used by the *parent* of this node.
-//
-// Notes:
-// Adds the defining node for each register to the useList.
-//
-int LinearScan::GetIndirInfo(GenTreeIndir* indirTree)
-{
- GenTree* const addr = indirTree->gtOp1;
- if (!addr->isContained())
- {
- appendLocationInfoToList(addr);
- return 1;
- }
- if (!addr->OperIs(GT_LEA))
- {
- return 0;
- }
-
- GenTreeAddrMode* const addrMode = addr->AsAddrMode();
-
- unsigned srcCount = 0;
- if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
- {
- appendLocationInfoToList(addrMode->Base());
- srcCount++;
- }
- if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
- {
- appendLocationInfoToList(addrMode->Index());
- srcCount++;
- }
- return srcCount;
-}
-
-//------------------------------------------------------------------------
-// GetOperandInfo: Get the source registers for an operand that might be contained.
-//
-// Arguments:
-// node - The node of interest
-// useList - The list of uses for the node that we're currently processing
-//
-// Return Value:
-// The number of source registers used by the *parent* of this node.
-//
-// Notes:
-// Adds the defining node for each register to the given useList.
-//
-int LinearScan::GetOperandInfo(GenTree* node)
-{
- if (!node->isContained())
- {
- appendLocationInfoToList(node);
- return 1;
- }
-
-#if !defined(_TARGET_64BIT_)
- if (node->OperIs(GT_LONG))
- {
- return appendBinaryLocationInfoToList(node->AsOp());
- }
-#endif // !defined(_TARGET_64BIT_)
- if (node->OperIsIndir())
- {
- const unsigned srcCount = GetIndirInfo(node->AsIndir());
- return srcCount;
- }
-
- return 0;
-}
-
-//------------------------------------------------------------------------
-// GetOperandInfo: Get the source registers for an operand that might be contained.
-//
-// Arguments:
-// node - The node of interest
-// useList - The list of uses for the node that we're currently processing
-//
-// Return Value:
-// The number of source registers used by the *parent* of this node.
-//
-// Notes:
-// Adds the defining node for each register to the useList.
-//
-int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo)
-{
- LocationInfoListNode* prevLast = useList.Last();
- int srcCount = GetOperandInfo(node);
- if (prevLast == nullptr)
- {
- *pFirstInfo = useList.Begin();
- }
- else
- {
- *pFirstInfo = prevLast->Next();
- }
- return srcCount;
-}
-
-void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node)
-{
- _dstCount = 0;
- _srcCount = 0;
- _internalIntCount = 0;
- _internalFloatCount = 0;
-
- isLocalDefUse = false;
- isDelayFree = false;
- hasDelayFreeSrc = false;
- isTgtPref = false;
- isInternalRegDelayFree = false;
-
- regMaskTP dstCandidates;
-
- // if there is a reg indicated on the tree node, use that for dstCandidates
- // the exception is the NOP, which sometimes show up around late args.
- // TODO-Cleanup: get rid of those NOPs.
- if (node->gtRegNum == REG_STK)
- {
- dstCandidates = RBM_NONE;
- }
- else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
- {
-#ifdef ARM_SOFTFP
- if (node->OperGet() == GT_PUTARG_REG)
- {
- dstCandidates = lsra->allRegs(TYP_INT);
- }
- else
-#endif
- {
- dstCandidates = lsra->allRegs(node->TypeGet());
- }
- }
- else
- {
- dstCandidates = genRegMask(node->gtRegNum);
- }
-
- setDstCandidates(lsra, dstCandidates);
- srcCandsIndex = dstCandsIndex;
-
- setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
-
-#ifdef DEBUG
- isInitialized = true;
-#endif
-
- assert(IsValid(lsra));
-}
-
-regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra)
-{
- return lsra->GetRegMaskForIndex(srcCandsIndex);
-}
-
-void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask)
-{
- LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
- assert(FitsIn<unsigned char>(i));
- srcCandsIndex = (unsigned char)i;
-}
-
-regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra)
-{
- return lsra->GetRegMaskForIndex(dstCandsIndex);
-}
-
-void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask)
-{
- LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
- assert(FitsIn<unsigned char>(i));
- dstCandsIndex = (unsigned char)i;
-}
-
-regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra)
-{
- return lsra->GetRegMaskForIndex(internalCandsIndex);
-}
-
-void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask)
-{
- LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
- assert(FitsIn<unsigned char>(i));
- internalCandsIndex = (unsigned char)i;
-}
-
-void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
-{
- LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask);
- assert(FitsIn<unsigned char>(i));
- internalCandsIndex = (unsigned char)i;
-}
-
#if TRACK_LSRA_STATS
// ----------------------------------------------------------
// updateLsraStat: Increment LSRA stat counter.
@@ -11616,18 +8943,7 @@ void LinearScan::lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDes
}
//------------------------------------------------------------------------
-// ComputeOperandDstCount: computes the number of registers defined by a
-// node.
-//
-// For most nodes, this is simple:
-// - Nodes that do not produce values (e.g. stores and other void-typed
-// nodes) and nodes that immediately use the registers they define
-// produce no registers
-// - Nodes that are marked as defining N registers define N registers.
-//
-// For contained nodes, however, things are more complicated: for purposes
-// of bookkeeping, a contained node is treated as producing the transitive
-// closure of the registers produced by its sources.
+// DumpOperandDefs: dumps the registers defined by a node.
//
// Arguments:
// operand - The operand for which to compute a register count.
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index a4981417cf..1814a55c01 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -30,15 +30,60 @@ const unsigned int MaxLocation = UINT_MAX;
const unsigned int MaxInternalRegisters = 8;
const unsigned int RegisterTypeCount = 2;
+/*****************************************************************************
+* Register types
+*****************************************************************************/
typedef var_types RegisterType;
#define IntRegisterType TYP_INT
#define FloatRegisterType TYP_FLOAT
+//------------------------------------------------------------------------
+// regType: Return the RegisterType to use for a given type
+//
+// Arguments:
+// type - the type of interest
+//
+template <class T>
+RegisterType regType(T type)
+{
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(type))
+ {
+ return FloatRegisterType;
+ }
+#endif // FEATURE_SIMD
+ return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType;
+}
+
+//------------------------------------------------------------------------
+// useFloatReg: Check if the given var_type should be allocated to a FloatRegisterType
+//
+inline bool useFloatReg(var_types type)
+{
+ return (regType(type) == FloatRegisterType);
+}
+
+//------------------------------------------------------------------------
+// registerTypesEquivalent: Check to see if two RegisterTypes are equivalent
+//
+inline bool registerTypesEquivalent(RegisterType a, RegisterType b)
+{
+ return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b);
+}
+
+//------------------------------------------------------------------------
+// registerTypesEquivalent: Get the set of callee-save registers of the given RegisterType
+//
inline regMaskTP calleeSaveRegs(RegisterType rt)
{
return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;
}
+//------------------------------------------------------------------------
+// LocationInfo: Captures the necessary information for a node that is "in-flight"
+// during `buildIntervals` (i.e. its definition has been encountered,
+// but not its use).
+//
struct LocationInfo
{
Interval* interval;
@@ -87,7 +132,7 @@ public:
// node during `buildIntervals`.
//
// This list of 'LocationInfoListNode's contains the source nodes consumed by
-// a node, and is created by 'TreeNodeInfoInit'.
+// a node, and is created by 'BuildNode'.
//
class LocationInfoList final
{
@@ -228,10 +273,10 @@ public:
}
//------------------------------------------------------------------------
- // GetTreeNodeInfo - retrieve the TreeNodeInfo for the given node
+ // removeListNode - retrieve the TreeNodeInfo for the given node
//
// Notes:
- // The TreeNodeInfoInit methods use this helper to retrieve the TreeNodeInfo for child nodes
+ // The BuildNode methods use this helper to retrieve the TreeNodeInfo for child nodes
// from the useList being constructed. Note that, if the user knows the order of the operands,
// it is expected that they should just retrieve them directly.
@@ -260,7 +305,7 @@ public:
}
prevListNode = listNode;
}
- assert(!"GetTreeNodeInfo didn't find the node");
+ assert(!"removeListNode didn't find the node");
unreached();
}
@@ -268,7 +313,7 @@ public:
// GetTreeNodeInfo - retrieve the TreeNodeInfo for the given node
//
// Notes:
- // The TreeNodeInfoInit methods use this helper to retrieve the TreeNodeInfo for child nodes
+ // The Build methods use this helper to retrieve the TreeNodeInfo for child nodes
// from the useList being constructed. Note that, if the user knows the order of the operands,
// it is expected that they should just retrieve them directly.
@@ -300,6 +345,35 @@ public:
}
};
+//------------------------------------------------------------------------
+// LocationInfoListNodePool: manages a pool of `LocationInfoListNode`
+// values to decrease overall memory usage
+// during `buildIntervals`.
+//
+// `buildIntervals` involves creating a list of location info values per
+// node that either directly produces a set of registers or that is a
+// contained node with register-producing sources. However, these lists
+// are short-lived: they are destroyed once the use of the corresponding
+// node is processed. As such, there is typically only a small number of
+// `LocationInfoListNode` values in use at any given time. Pooling these
+// values avoids otherwise frequent allocations.
+class LocationInfoListNodePool final
+{
+ LocationInfoListNode* m_freeList;
+ Compiler* m_compiler;
+ static const unsigned defaultPreallocation = 8;
+
+public:
+ // Creates a pool of `LocationInfoListNode` values.
+ LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation);
+
+ // Fetches an unused node from the pool.
+ LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0);
+
+ // Returns a list of nodes to the pool.
+ void ReturnNodes(LocationInfoList& list);
+};
+
struct LsraBlockInfo
{
// bbNum of the predecessor to use for the register location of live-in variables.
@@ -623,6 +697,11 @@ public:
regMaskTP GetRegMaskForIndex(RegMaskIndex index);
void RemoveRegisterFromMasks(regNumber reg);
+ static bool isSingleRegister(regMaskTP regMask)
+ {
+ return (genExactlyOneBit(regMask));
+ }
+
#ifdef DEBUG
void dspRegisterMaskTable();
#endif // DEBUG
@@ -954,6 +1033,8 @@ private:
#ifdef DEBUG
void checkLastUses(BasicBlock* block);
+ static int ComputeOperandDstCount(GenTree* operand);
+ static int ComputeAvailableSrcCount(GenTree* node);
#endif // DEBUG
void setFrameType();
@@ -993,10 +1074,7 @@ private:
void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
- void buildRefPositionsForNode(GenTree* tree,
- BasicBlock* block,
- LocationInfoListNodePool& listNodePool,
- LsraLocation loc);
+ void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc);
@@ -1053,7 +1131,11 @@ private:
void freeRegister(RegRecord* physRegRecord);
void freeRegisters(regMaskTP regsToFree);
- var_types getDefType(GenTree* tree);
+ // Get the type that this tree defines.
+ var_types getDefType(GenTree* tree)
+ {
+ return tree->TypeGet();
+ }
RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask);
@@ -1479,19 +1561,27 @@ private:
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
//-----------------------------------------------------------------------
- // TreeNodeInfo methods
+ // Build methods
//-----------------------------------------------------------------------
+ // The listNodePool is used to maintain the TreeNodeInfo for nodes that are "in flight"
+ // i.e. whose consuming node has not yet been handled.
+ LocationInfoListNodePool listNodePool;
+
// The defList is used for the transient TreeNodeInfo that is computed by
- // the TreeNodeInfoInit methods, and used in building RefPositions.
+ // the Build methods, and used in building RefPositions.
// When Def RefPositions are built for a node, their NodeInfo is placed
// in the defList. As the consuming node is handled, it moves the NodeInfo
// into an ordered useList corresponding to the uses for that node.
LocationInfoList defList;
- // The useList is constructed for each node by the TreeNodeInfoInit methods.
+
+ // The useList is constructed for each node by the Build methods.
// It contains the TreeNodeInfo for its operands, in their order of use.
LocationInfoList useList;
+ // During the build phase, this is the NodeInfo for the current node.
+ TreeNodeInfo* currentNodeInfo;
+
// Remove the LocationInfoListNode for the given node from the defList, and put it into the useList.
// The node must not be contained, and must have been processed by buildRefPositionsForNode().
void appendLocationInfoToList(GenTree* node)
@@ -1545,34 +1635,34 @@ private:
}
// This is the main entry point for computing the TreeNodeInfo for a node.
- void TreeNodeInfoInit(GenTree* stmt, TreeNodeInfo* info);
+ void BuildNode(GenTree* stmt);
- void TreeNodeInfoInitCheckByteable(GenTree* tree, TreeNodeInfo* info);
+ void BuildCheckByteable(GenTree* tree);
bool CheckAndSetDelayFree(GenTree* delayUseSrc);
- void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info);
+ void BuildSimple(GenTree* tree);
int GetOperandInfo(GenTree* node);
int GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo);
int GetIndirInfo(GenTreeIndir* indirTree);
- void HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs);
+ void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
- void TreeNodeInfoInitStoreLoc(GenTree* tree, TreeNodeInfo* info);
- void TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info);
+ void BuildStoreLoc(GenTree* tree);
+ void BuildReturn(GenTree* tree);
// This method, unlike the others, returns the number of sources, since it may be called when
// 'tree' is contained.
- int TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info);
- void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info);
- void TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info);
- void TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info);
- void TreeNodeInfoInitStructArg(GenTree* structArg, TreeNodeInfo* info);
- void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info);
- void TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info);
- void TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info);
- void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree, TreeNodeInfo* info);
- void TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info);
- void TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info);
- void TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info);
+ int BuildShiftRotate(GenTree* tree);
+ void BuildPutArgReg(GenTreeUnOp* node);
+ void BuildCall(GenTreeCall* call);
+ void BuildCmp(GenTree* tree);
+ void BuildStructArg(GenTree* structArg);
+ void BuildBlockStore(GenTreeBlk* blkNode);
+ void BuildModDiv(GenTree* tree);
+ void BuildIntrinsic(GenTree* tree);
+ void BuildStoreLoc(GenTreeLclVarCommon* tree);
+ void BuildIndir(GenTreeIndir* indirTree);
+ void BuildGCWriteBarrier(GenTree* tree);
+ void BuildCast(GenTree* tree);
#ifdef _TARGET_X86_
bool ExcludeNonByteableRegisters(GenTree* tree);
@@ -1581,23 +1671,23 @@ private:
#if defined(_TARGET_XARCH_)
// returns true if the tree can use the read-modify-write memory instruction form
bool isRMWRegOper(GenTree* tree);
- void TreeNodeInfoInitMul(GenTree* tree, TreeNodeInfo* info);
+ void BuildMul(GenTree* tree);
void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
#endif // defined(_TARGET_XARCH_)
#ifdef FEATURE_SIMD
- void TreeNodeInfoInitSIMD(GenTreeSIMD* tree, TreeNodeInfo* info);
+ void BuildSIMD(GenTreeSIMD* tree);
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
- void TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info);
+ void BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree);
#endif // FEATURE_HW_INTRINSICS
- void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeInfo* info);
+ void BuildPutArgStk(GenTreePutArgStk* argNode);
#ifdef _TARGET_ARM_
- void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree, TreeNodeInfo* info);
+ void BuildPutArgSplit(GenTreePutArgSplit* tree);
#endif
- void TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info);
+ void BuildLclHeap(GenTree* tree);
};
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
index 6e0cf22d40..fb3df19139 100644
--- a/src/jit/lsraarm.cpp
+++ b/src/jit/lsraarm.cpp
@@ -29,86 +29,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
#include "lsra.h"
-//------------------------------------------------------------------------
-// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info)
-{
- GenTree* op1 = tree->gtGetOp1();
-
- assert(info->dstCount == 0);
- if (tree->TypeGet() == TYP_LONG)
- {
- assert((op1->OperGet() == GT_LONG) && op1->isContained());
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
- info->srcCount = 2;
- LocationInfoListNode* loValInfo = getLocationInfo(loVal);
- LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
- loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO);
- hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI);
- useList.Append(loValInfo);
- useList.Append(hiValInfo);
- }
- else if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
- {
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1;
-
- if (varTypeIsStruct(tree))
- {
- // op1 has to be either an lclvar or a multi-reg returning call
- if (op1->OperGet() != GT_LCL_VAR)
- {
- noway_assert(op1->IsMultiRegCall());
-
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
- }
- }
- else
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- // We ONLY want the valid double register in the RBM_DOUBLERET mask.
- useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE);
- break;
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- LocationInfoListNode* locationInfo = getLocationInfo(op1);
- if (useCandidates != RBM_NONE)
- {
- locationInfo->info.setSrcCandidates(this, useCandidates);
- }
- useList.Append(locationInfo);
- }
-}
-
-void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildLclHeap(GenTree* tree)
{
+ TreeNodeInfo* info = currentNodeInfo;
assert(info->dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenarm.cpp):
@@ -190,7 +113,7 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInit: Set the register requirements for RA.
+// BuildNode: Set the register requirements for RA.
//
// Notes:
// Takes care of annotating the register requirements
@@ -205,10 +128,11 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info)
// requirements needed by LSRA to build the Interval Table (source,
// destination and internal [temp] register counts).
//
-void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildNode(GenTree* tree)
{
- unsigned kind = tree->OperKind();
- RegisterType registerType = TypeGet(tree);
+ TreeNodeInfo* info = currentNodeInfo;
+ unsigned kind = tree->OperKind();
+ RegisterType registerType = TypeGet(tree);
if (tree->isContained())
{
@@ -238,7 +162,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info);
+ BuildStoreLoc(tree->AsLclVarCommon());
break;
case GT_NOP:
@@ -273,7 +197,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
assert(info->dstCount == 1);
break;
default:
- NYI_ARM("LinearScan::TreeNodeInfoInit for GT_INTRINSIC");
+ NYI_ARM("LinearScan::Build for GT_INTRINSIC");
break;
}
}
@@ -492,7 +416,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_RETURN:
- TreeNodeInfoInitReturn(tree, info);
+ BuildReturn(tree);
break;
case GT_RETFILT:
@@ -630,7 +554,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_ROR:
case GT_LSH_HI:
case GT_RSH_LO:
- TreeNodeInfoInitShiftRotate(tree, info);
+ BuildShiftRotate(tree);
break;
case GT_EQ:
@@ -640,7 +564,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_GE:
case GT_GT:
case GT_CMP:
- TreeNodeInfoInitCmp(tree, info);
+ BuildCmp(tree);
break;
case GT_CKFINITE:
@@ -651,7 +575,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall(), info);
+ BuildCall(tree->AsCall());
break;
case GT_ADDR:
@@ -668,7 +592,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk(), info);
+ BuildBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
@@ -677,7 +601,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_LCLHEAP:
- TreeNodeInfoInitLclHeap(tree, info);
+ BuildLclHeap(tree);
break;
case GT_STOREIND:
@@ -688,11 +612,11 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
info->srcCount = 2;
- TreeNodeInfoInitGCWriteBarrier(tree, info);
+ BuildGCWriteBarrier(tree);
break;
}
- TreeNodeInfoInitIndir(tree->AsIndir(), info);
+ BuildIndir(tree->AsIndir());
// No contained source on ARM.
assert(!src->isContained());
info->srcCount++;
@@ -712,7 +636,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_IND:
assert(info->dstCount == 1);
info->srcCount = 1;
- TreeNodeInfoInitIndir(tree->AsIndir(), info);
+ BuildIndir(tree->AsIndir());
break;
case GT_CATCH_ARG:
@@ -751,15 +675,15 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_PUTARG_SPLIT:
- TreeNodeInfoInitPutArgSplit(tree->AsPutArgSplit(), info);
+ BuildPutArgSplit(tree->AsPutArgSplit());
break;
case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info);
+ BuildPutArgStk(tree->AsPutArgStk());
break;
case GT_PUTARG_REG:
- TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info);
+ BuildPutArgReg(tree->AsUnOp());
break;
case GT_BITCAST:
@@ -792,7 +716,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
GenTree::OpName(tree->OperGet()));
NYIRAW(message);
#else
- NYI_ARM("TreeNodeInfoInit default case");
+ NYI_ARM("BuildNode default case");
#endif
case GT_LCL_FLD:
case GT_LCL_FLD_ADDR:
@@ -808,19 +732,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_SETCC:
case GT_MEMORYBARRIER:
case GT_OBJ:
- assert(info->dstCount == (tree->IsValue() ? 1 : 0));
- if (kind & (GTK_CONST | GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- }
- else
- {
- unreached();
- }
+ BuildSimple(tree);
break;
case GT_INDEX_ADDR:
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index 2bfcfa2651..990f39def0 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -29,7 +29,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
//------------------------------------------------------------------------
-// TreeNodeInfoInit: Set the register requirements for RA.
+// BuildNode: Set the register requirements for RA.
//
// Notes:
// Takes care of annotating the register requirements
@@ -44,10 +44,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// requirements needed by LSRA to build the Interval Table (source,
// destination and internal [temp] register counts).
//
-void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildNode(GenTree* tree)
{
- unsigned kind = tree->OperKind();
- RegisterType registerType = TypeGet(tree);
+ TreeNodeInfo* info = currentNodeInfo;
+ unsigned kind = tree->OperKind();
+ RegisterType registerType = TypeGet(tree);
if (tree->isContained())
{
@@ -76,25 +77,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
GenTree* op2;
default:
- if (kind & (GTK_CONST | GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- }
- else
- {
- unreached();
- }
+ BuildSimple(tree);
break;
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
info->srcCount = 1;
assert(info->dstCount == 0);
- TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info);
+ BuildStoreLoc(tree->AsLclVarCommon());
break;
case GT_LIST:
@@ -136,7 +126,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_RETURN:
- TreeNodeInfoInitReturn(tree, info);
+ BuildReturn(tree);
break;
case GT_RETFILT:
@@ -281,13 +271,13 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
#ifdef FEATURE_SIMD
case GT_SIMD:
- TreeNodeInfoInitSIMD(tree->AsSIMD(), info);
+ BuildSIMD(tree->AsSIMD());
break;
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
case GT_HWIntrinsic:
- TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic(), info);
+ BuildHWIntrinsic(tree->AsHWIntrinsic());
break;
#endif // FEATURE_HW_INTRINSICS
@@ -350,7 +340,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_RSH:
case GT_RSZ:
case GT_ROR:
- TreeNodeInfoInitShiftRotate(tree, info);
+ BuildShiftRotate(tree);
break;
case GT_EQ:
@@ -362,7 +352,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_JCMP:
- TreeNodeInfoInitCmp(tree, info);
+ BuildCmp(tree);
break;
case GT_CKFINITE:
@@ -429,15 +419,15 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info);
+ BuildPutArgStk(tree->AsPutArgStk());
break;
case GT_PUTARG_REG:
- TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info);
+ BuildPutArgReg(tree->AsUnOp());
break;
case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall(), info);
+ BuildCall(tree->AsCall());
break;
case GT_ADDR:
@@ -461,7 +451,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk(), info);
+ BuildBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
@@ -669,11 +659,11 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
info->srcCount = 2;
- TreeNodeInfoInitGCWriteBarrier(tree, info);
+ BuildGCWriteBarrier(tree);
break;
}
- TreeNodeInfoInitIndir(tree->AsIndir(), info);
+ BuildIndir(tree->AsIndir());
if (!tree->gtGetOp2()->isContained())
{
appendLocationInfoToList(tree->gtGetOp2());
@@ -693,7 +683,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_IND:
assert(info->dstCount == 1);
- TreeNodeInfoInitIndir(tree->AsIndir(), info);
+ BuildIndir(tree->AsIndir());
break;
case GT_CATCH_ARG:
@@ -732,72 +722,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
assert(info->dstCount == tree->GetRegisterDstCount());
}
-//------------------------------------------------------------------------
-// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info)
-{
- GenTree* op1 = tree->gtGetOp1();
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1;
- assert(info->dstCount == 0);
-
- if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
- {
- if (varTypeIsStruct(tree))
- {
- // op1 has to be either an lclvar or a multi-reg returning call
- if (op1->OperGet() != GT_LCL_VAR)
- {
- noway_assert(op1->IsMultiRegCall());
-
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
- }
- }
- else
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- LocationInfoListNode* locationInfo = getLocationInfo(op1);
- if (useCandidates != RBM_NONE)
- {
- locationInfo->info.setSrcCandidates(this, useCandidates);
- }
- useList.Append(locationInfo);
- }
-}
-
#ifdef FEATURE_SIMD
//------------------------------------------------------------------------
-// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+// BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
//
// Arguments:
// tree - The GT_SIMD node of interest
@@ -805,8 +732,9 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info)
// Return Value:
// None.
-void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
+void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
{
+ TreeNodeInfo* info = currentNodeInfo;
// Only SIMDIntrinsicInit can be contained
if (simdTree->isContained())
{
@@ -983,7 +911,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
#ifdef FEATURE_HW_INTRINSICS
//------------------------------------------------------------------------
-// TreeNodeInfoInitHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
+// BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
//
// Arguments:
// tree - The GT_HWIntrinsic node of interest
@@ -991,8 +919,9 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
// Return Value:
// None.
-void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info)
+void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
+ TreeNodeInfo* info = currentNodeInfo;
NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp1);
if (intrinsicTree->gtGetOp2IfPresent() != nullptr)
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
index 189a37697a..0bdc13ef42 100644
--- a/src/jit/lsraarmarch.cpp
+++ b/src/jit/lsraarmarch.cpp
@@ -29,142 +29,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lsra.h"
//------------------------------------------------------------------------
-// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar
-//
-// Arguments:
-// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
-//
-// Notes:
-// This involves:
-// - Setting the appropriate candidates for a store of a multi-reg call return value.
-// - Handling of contained immediates.
-//
-void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNodeInfo* info)
-{
- GenTree* op1 = storeLoc->gtGetOp1();
-
- assert(info->dstCount == 0);
- if (op1->IsMultiRegCall())
- {
- // This is the case of var = call where call is returning
- // a value in multiple return registers.
- // Must be a store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- info->srcCount = regCount;
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
- LocationInfoListNode* locInfo = getLocationInfo(op1);
- locInfo->info.setSrcCandidates(this, srcCandidates);
- useList.Append(locInfo);
- }
-#ifdef _TARGET_ARM_
- else if (varTypeIsLong(op1))
- {
- // The only possible operands for a GT_STORE_LCL_VAR are a multireg call node, which we have
- // handled above, or a GT_LONG node.
- assert(!op1->OperIs(GT_LONG) || op1->isContained());
- info->srcCount = 2;
- // TODO: Currently, GetOperandInfo always returns 1 for any non-contained node.
- // Consider enhancing it to handle multi-reg nodes.
- (void)GetOperandInfo(op1);
- }
-#endif // _TARGET_ARM_
- else if (op1->isContained())
- {
- info->srcCount = 0;
- }
- else
- {
- info->srcCount = 1;
- appendLocationInfoToList(op1);
- }
-
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc))
- {
- if (storeLoc->TypeGet() == TYP_SIMD12)
- {
- // Need an additional register to extract upper 4 bytes of Vector3.
- info->internalIntCount = 1;
- }
- }
-#endif // FEATURE_SIMD
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCmp: Lower a GT comparison node.
-//
-// Arguments:
-// tree - the node to lower
-//
-// Return Value:
-// None.
-//
-void LinearScan::TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info)
-{
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
-
- assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
- info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
-}
-
-void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info)
-{
- GenTree* dst = tree;
- GenTree* addr = tree->gtOp.gtOp1;
- GenTree* src = tree->gtOp.gtOp2;
- LocationInfoListNode* addrInfo = getLocationInfo(addr);
- LocationInfoListNode* srcInfo = getLocationInfo(src);
-
- // In the case where we are doing a helper assignment, even if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- assert(!addr->isContained() && !src->isContained());
- useList.Append(addrInfo);
- useList.Append(srcInfo);
- info->srcCount = 2;
- assert(info->dstCount == 0);
-
-#if NOGC_WRITE_BARRIERS
- NYI_ARM("NOGC_WRITE_BARRIERS");
-
- // For the NOGC JIT Helper calls
- //
- // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
- // the 'src' goes into x15 (REG_WRITE_BARRIER)
- //
- addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF);
- srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER);
-#else
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- addrInfo->info.setSrcCandidates(this, RBM_ARG_0);
- srcInfo->info.setSrcCandidates(this, RBM_ARG_1);
-#endif // NOGC_WRITE_BARRIERS
-
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addrInfo->info.dstCount == 1);
- assert(srcInfo->info.dstCount == 1);
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitIndir: Specify register requirements for address expression
+// BuildIndir: Specify register requirements for address expression
// of an indirection operation.
//
// Arguments:
// indirTree - GT_IND, GT_STOREIND or block gentree node
//
-void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info)
+void LinearScan::BuildIndir(GenTreeIndir* indirTree)
{
+ TreeNodeInfo* info = currentNodeInfo;
// If this is the rhs of a block copy (i.e. non-enregisterable struct),
// it has no register requirements.
if (indirTree->TypeGet() == TYP_STRUCT)
@@ -239,7 +112,7 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* in
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
+// BuildShiftRotate: Set the NodeInfo for a shift or rotate.
//
// Arguments:
// tree - The node of interest
@@ -247,10 +120,11 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* in
// Return Value:
// None.
//
-int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info)
+int LinearScan::BuildShiftRotate(GenTree* tree)
{
- GenTree* source = tree->gtOp.gtOp1;
- GenTree* shiftBy = tree->gtOp.gtOp2;
+ TreeNodeInfo* info = currentNodeInfo;
+ GenTree* source = tree->gtOp.gtOp1;
+ GenTree* shiftBy = tree->gtOp.gtOp2;
assert(info->dstCount == 1);
if (!shiftBy->isContained())
{
@@ -291,84 +165,7 @@ int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG.
-//
-// Arguments:
-// node - The PUTARG_REG node.
-// argReg - The register in which to pass the argument.
-// info - The info for the node's using call.
-// isVarArgs - True if the call uses a varargs calling convention.
-// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
-//
-// Return Value:
-// None.
-//
-void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info)
-{
- assert(node != nullptr);
- assert(node->OperIsPutArgReg());
- info->srcCount = 1;
- regNumber argReg = node->gtRegNum;
- assert(argReg != REG_NA);
-
- // Set the register requirements for the node.
- regMaskTP argMask = genRegMask(argReg);
-
-#ifdef _TARGET_ARM_
- // If type of node is `long` then it is actually `double`.
- // The actual `long` types must have been transformed as a field list with two fields.
- if (node->TypeGet() == TYP_LONG)
- {
- info->srcCount++;
- info->dstCount = info->srcCount;
- assert(genRegArgNext(argReg) == REG_NEXT(argReg));
- argMask |= genRegMask(REG_NEXT(argReg));
- }
-#endif // _TARGET_ARM_
- info->setDstCandidates(this, argMask);
- info->setSrcCandidates(this, argMask);
-
- // To avoid redundant moves, have the argument operand computed in the
- // register in which the argument is passed to the call.
- LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1);
- op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this));
- op1Info->info.isDelayFree = true;
- useList.Append(op1Info);
-}
-
-//------------------------------------------------------------------------
-// HandleFloatVarArgs: Handle additional register requirements for a varargs call
-//
-// Arguments:
-// call - The call node of interest
-// argNode - The current argument
-//
-// Return Value:
-// None.
-//
-// Notes:
-// In the case of a varargs call, the ABI dictates that if we have floating point args,
-// we must pass the enregistered arguments in both the integer and floating point registers.
-// Since the integer register is not associated with the arg node, we will reserve it as
-// an internal register on the call so that it is not used during the evaluation of the call node
-// (e.g. for the target).
-void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs)
-{
-#if FEATURE_VARARG
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- *callHasFloatRegArgs = true;
-
- regNumber argReg = argNode->gtRegNum;
- regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- info->setInternalIntCount(info->internalIntCount + 1);
- info->addInternalCandidates(this, genRegMask(targetReg));
- }
-#endif // FEATURE_VARARG
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+// BuildCall: Set the NodeInfo for a call.
//
// Arguments:
// call - The call node of interest
@@ -376,8 +173,9 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTr
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
+void LinearScan::BuildCall(GenTreeCall* call)
{
+ TreeNodeInfo* info = currentNodeInfo;
bool hasMultiRegRetVal = false;
ReturnTypeDesc* retTypeDesc = nullptr;
@@ -478,7 +276,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
GenTree* argNode = list->Current();
#ifdef DEBUG
- // During TreeNodeInfoInit, we only use the ArgTabEntry for validation,
+ // During Build, we only use the ArgTabEntry for validation,
// as getting it is rather expensive.
fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
regNumber argReg = curArgTabEntry->regNum;
@@ -551,7 +349,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
{
assert(argNode->OperIs(GT_PUTARG_REG));
assert(argNode->gtRegNum == argReg);
- HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs);
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
#ifdef _TARGET_ARM_
// The `double` types have been transformed to `long` on armel,
// while the actual long types have been decomposed.
@@ -636,7 +434,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
//
// Arguments:
// argNode - a GT_PUTARG_STK node
@@ -647,8 +445,9 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
// Notes:
// Set the child node(s) to be contained when we have a multireg arg
//
-void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeInfo* info)
+void LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
{
+ TreeNodeInfo* info = currentNodeInfo;
assert(argNode->gtOper == GT_PUTARG_STK);
GenTree* putArgChild = argNode->gtOp.gtOp1;
@@ -717,7 +516,7 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeIn
#ifdef _TARGET_ARM_
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node
+// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node
//
// Arguments:
// argNode - a GT_PUTARG_SPLIT node
@@ -728,8 +527,9 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeIn
// Notes:
// Set the child node(s) to be contained
//
-void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo* info)
+void LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
{
+ TreeNodeInfo* info = currentNodeInfo;
assert(argNode->gtOper == GT_PUTARG_SPLIT);
GenTree* putArgChild = argNode->gtOp.gtOp1;
@@ -807,7 +607,7 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNo
#endif // _TARGET_ARM_
//------------------------------------------------------------------------
-// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+// BuildBlockStore: Set the NodeInfo for a block store.
//
// Arguments:
// blkNode - The block store node of interest
@@ -815,11 +615,12 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNo
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info)
+void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
+ TreeNodeInfo* info = currentNodeInfo;
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
LocationInfoListNode* dstAddrInfo = nullptr;
LocationInfoListNode* sourceInfo = nullptr;
diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp
new file mode 100644
index 0000000000..0d3cccb6b3
--- /dev/null
+++ b/src/jit/lsrabuild.cpp
@@ -0,0 +1,3205 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Interval and RefPosition Building XX
+XX XX
+XX This contains the logic for constructing Intervals and RefPositions that XX
+XX is common across architectures. See lsra{arch}.cpp for the architecture- XX
+XX specific methods for building. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#include "lsra.h"
+
+//------------------------------------------------------------------------
+// LocationInfoListNodePool::LocationInfoListNodePool:
+// Creates a pool of `LocationInfoListNode` values.
+//
+// Arguments:
+// compiler - The compiler context.
+// preallocate - The number of nodes to preallocate.
+//
+LocationInfoListNodePool::LocationInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler)
+{
+ if (preallocate > 0)
+ {
+ size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate;
+ LocationInfoListNode* preallocatedNodes =
+ reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA));
+
+ LocationInfoListNode* head = preallocatedNodes;
+ head->m_next = nullptr;
+
+ for (unsigned i = 1; i < preallocate; i++)
+ {
+ LocationInfoListNode* node = &preallocatedNodes[i];
+ node->m_next = head;
+ head = node;
+ }
+
+ m_freeList = head;
+ }
+}
+
+//------------------------------------------------------------------------
+// LocationInfoListNodePool::GetNode: Fetches an unused node from the
+// pool.
+//
+// Arguments:
+// l - - The `LsraLocation` for the `LocationInfo` value.
+// i - The interval for the `LocationInfo` value.
+// t - The IR node for the `LocationInfo` value
+// regIdx - The register index for the `LocationInfo` value.
+//
+// Returns:
+// A pooled or newly-allocated `LocationInfoListNode`, depending on the
+// contents of the pool.
+LocationInfoListNode* LocationInfoListNodePool::GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx)
+{
+ LocationInfoListNode* head = m_freeList;
+ if (head == nullptr)
+ {
+ head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode)));
+ }
+ else
+ {
+ m_freeList = head->m_next;
+ }
+
+ head->loc = l;
+ head->interval = i;
+ head->treeNode = t;
+ head->m_next = nullptr;
+
+ return head;
+}
+
+//------------------------------------------------------------------------
+// LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the node
+// pool and clears the given list.
+//
+// Arguments:
+// list - The list to return.
+//
+void LocationInfoListNodePool::ReturnNodes(LocationInfoList& list)
+{
+ assert(list.m_head != nullptr);
+ assert(list.m_tail != nullptr);
+
+ LocationInfoListNode* head = m_freeList;
+ list.m_tail->m_next = head;
+ m_freeList = list.m_head;
+
+ list.m_head = nullptr;
+ list.m_tail = nullptr;
+}
+
+//------------------------------------------------------------------------
+// newInterval: Create a new Interval of the given RegisterType.
+//
+// Arguments:
+// theRegisterType - The type of Interval to create.
+//
+// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately
+// set such fields as isStructField
+//
+Interval* LinearScan::newInterval(RegisterType theRegisterType)
+{
+ intervals.emplace_back(theRegisterType, allRegs(theRegisterType));
+ Interval* newInt = &intervals.back();
+
+#ifdef DEBUG
+ newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1);
+#endif // DEBUG
+
+ DBEXEC(VERBOSE, newInt->dump());
+ return newInt;
+}
+
+//------------------------------------------------------------------------
+// newRefPositionRaw: Create a new RefPosition
+//
+// Arguments:
+// nodeLocation - The location of the reference.
+// treeNode - The GenTree of the reference.
+// refType - The type of reference
+//
+// Notes:
+// This is used to create RefPositions for both RegRecords and Intervals,
+// so it does only the common initialization.
+//
+RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
+{
+ refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType);
+ RefPosition* newRP = &refPositions.back();
+#ifdef DEBUG
+ newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1);
+#endif // DEBUG
+ return newRP;
+}
+
+//------------------------------------------------------------------------
+// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use
+// register requirements on a single-def, single-use interval.
+//
+// Arguments:
+// defRefPosition - The interval definition
+// useRefPosition - The (sole) interval use
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The two RefPositions are for the same interval, which is a tree-temp.
+//
+// Notes:
+// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg.
+// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that,
+// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs
+// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4
+// below.
+// Here are the cases we consider (in this order):
+// 1. If The defRefPosition specifies a single register, and there are no conflicting
+// FixedReg uses of it between the def and use, we use that register, and the code generator
+// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def.
+// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no
+// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator
+// will insert the copy.
+// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined
+// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register),
+/// we set the register requirements on the defRefPosition to the use registers, and the
+// code generator will insert a copy on the def. We can't rely on the code generator to put a copy
+// on the use if it has multiple possible candidates, as it won't know which one has been allocated.
+// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register
+// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set
+// the useRefPosition to the def registers, for similar reasons to case #3.
+// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts,
+// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are
+// single registers, codegen can insert the copy.
+// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but
+// has a conflict), we must insert a copy. The copy will be inserted before the use if the
+// use is not fixed (in the fixed case, the code generator will insert the use).
+//
+// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is
+// available for the lifetime. We end up allocating a register that must be spilled, and it probably
+// won't be the register that is actually defined by the target instruction. So, we have to copy it
+// and THEN spill it. In this case, we should be using the def requirement. But we need to change
+// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but
+// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway
+// we can use the fixed-reg on the def.
+//
+
+void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition)
+{
+ assert(!interval->isLocalVar);
+
+ RefPosition* useRefPosition = defRefPosition->nextRefPosition;
+ regMaskTP defRegAssignment = defRefPosition->registerAssignment;
+ regMaskTP useRegAssignment = useRefPosition->registerAssignment;
+ RegRecord* defRegRecord = nullptr;
+ RegRecord* useRegRecord = nullptr;
+ regNumber defReg = REG_NA;
+ regNumber useReg = REG_NA;
+ bool defRegConflict = false;
+ bool useRegConflict = false;
+
+ // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment
+ // on it, or we will fail to ensure that the fixedReg is busy at the time the target
+ // (of the node that uses this interval) is allocated.
+ bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree;
+
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT));
+ if (!canChangeUseAssignment)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE));
+ }
+ if (defRefPosition->isFixedRegRef)
+ {
+ defReg = defRefPosition->assignedReg();
+ defRegRecord = getRegisterRecord(defReg);
+ if (canChangeUseAssignment)
+ {
+ RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
+ assert(currFixedRegRefPosition != nullptr &&
+ currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation);
+
+ if (currFixedRegRefPosition->nextRefPosition == nullptr ||
+ currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation())
+ {
+ // This is case #1. Use the defRegAssignment
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1));
+ useRefPosition->registerAssignment = defRegAssignment;
+ return;
+ }
+ else
+ {
+ defRegConflict = true;
+ }
+ }
+ }
+ if (useRefPosition->isFixedRegRef)
+ {
+ useReg = useRefPosition->assignedReg();
+ useRegRecord = getRegisterRecord(useReg);
+ RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition;
+
+ // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null.
+ RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition();
+ assert(nextFixedRegRefPosition != nullptr &&
+ nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation);
+
+ // First, check to see if there are any conflicting FixedReg references between the def and use.
+ if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation)
+ {
+ // OK, no conflicting FixedReg references.
+ // Now, check to see whether it is currently in use.
+ if (useRegRecord->assignedInterval != nullptr)
+ {
+ RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition;
+ LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation();
+ if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation)
+ {
+ useRegConflict = true;
+ }
+ }
+ if (!useRegConflict)
+ {
+ // This is case #2. Use the useRegAssignment
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2));
+ defRefPosition->registerAssignment = useRegAssignment;
+ return;
+ }
+ }
+ else
+ {
+ useRegConflict = true;
+ }
+ }
+ if (defRegRecord != nullptr && !useRegConflict)
+ {
+ // This is case #3.
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3));
+ defRefPosition->registerAssignment = useRegAssignment;
+ return;
+ }
+ if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment)
+ {
+ // This is case #4.
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4));
+ useRefPosition->registerAssignment = defRegAssignment;
+ return;
+ }
+ if (defRegRecord != nullptr && useRegRecord != nullptr)
+ {
+ // This is case #5.
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5));
+ RegisterType regType = interval->registerType;
+ assert((getRegisterType(interval, defRefPosition) == regType) &&
+ (getRegisterType(interval, useRefPosition) == regType));
+ regMaskTP candidates = allRegs(regType);
+ defRefPosition->registerAssignment = candidates;
+ return;
+ }
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6));
+ return;
+}
+
+//------------------------------------------------------------------------
+// applyCalleeSaveHeuristics: Set register preferences for an interval based on the given RefPosition
+//
+// Arguments:
+// rp - The RefPosition of interest
+//
+// Notes:
+// This is slightly more general than its name applies, and updates preferences not just
+// for callee-save registers.
+//
+void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp)
+{
+#ifdef _TARGET_AMD64_
+ if (compiler->opts.compDbgEnC)
+ {
+ // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs.
+ return;
+ }
+#endif // _TARGET_AMD64_
+
+ Interval* theInterval = rp->getInterval();
+
+#ifdef DEBUG
+ if (!doReverseCallerCallee())
+#endif // DEBUG
+ {
+ // Set preferences so that this register set will be preferred for earlier refs
+ theInterval->updateRegisterPreferences(rp->registerAssignment);
+ }
+}
+
+//------------------------------------------------------------------------
+// checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps.
+//
+// Arguments:
+// useRP - The use RefPosition of a tree temp (SDSU Interval)
+//
+// Notes:
+// There are a couple of cases where this may over-constrain allocation:
+// 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or
+// 2. In the case where the defining node requires a temp distinct from the target (also a
+// delay-free case).
+// In those cases, if we propagate a single-register restriction from the consumer to the producer
+// the delayed uses will not see a fixed reference in the PhysReg at that position, and may
+// incorrectly allocate that register.
+// TODO-CQ: This means that we may often require a copy at the use of this node's result.
+// This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is
+// created, causing a RefTypeFixedRef to be added at that location. This, however, results in
+// more PhysReg RefPositions (a throughput impact), and a large number of diffs that require
+// further analysis to determine benefit.
+// See Issue #11274.
+//
+void LinearScan::checkConflictingDefUse(RefPosition* useRP)
+{
+ assert(useRP->refType == RefTypeUse);
+ Interval* theInterval = useRP->getInterval();
+ assert(!theInterval->isLocalVar);
+
+ RefPosition* defRP = theInterval->firstRefPosition;
+
+ // All defs must have a valid treeNode, but we check it below to be conservative.
+ assert(defRP->treeNode != nullptr);
+ regMaskTP prevAssignment = defRP->registerAssignment;
+ regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment);
+ if (newAssignment != RBM_NONE)
+ {
+ if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses)
+ {
+ defRP->registerAssignment = newAssignment;
+ }
+ }
+ else
+ {
+ theInterval->hasConflictingDefUse = true;
+ }
+}
+
+//------------------------------------------------------------------------
+// associateRefPosWithInterval: Update the Interval based on the given RefPosition.
+//
+// Arguments:
+// rp - The RefPosition of interest
+//
+// Notes:
+// This is called at the time when 'rp' has just been created, so it becomes
+// the nextRefPosition of the recentRefPosition, and both the recentRefPosition
+// and lastRefPosition of its referent.
+//
+void LinearScan::associateRefPosWithInterval(RefPosition* rp)
+{
+ Referenceable* theReferent = rp->referent;
+
+ if (theReferent != nullptr)
+ {
+ // All RefPositions except the dummy ones at the beginning of blocks
+
+ if (rp->isIntervalRef())
+ {
+ Interval* theInterval = rp->getInterval();
+
+ applyCalleeSaveHeuristics(rp);
+
+ if (theInterval->isLocalVar)
+ {
+ if (RefTypeIsUse(rp->refType))
+ {
+ RefPosition* const prevRP = theInterval->recentRefPosition;
+ if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum))
+ {
+ prevRP->lastUse = false;
+ }
+ }
+
+ rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) &&
+ (rp->refType != RefTypeZeroInit) && !extendLifetimes();
+ }
+ else if (rp->refType == RefTypeUse)
+ {
+ checkConflictingDefUse(rp);
+ rp->lastUse = true;
+ }
+ }
+
+ RefPosition* prevRP = theReferent->recentRefPosition;
+ if (prevRP != nullptr)
+ {
+ prevRP->nextRefPosition = rp;
+ }
+ else
+ {
+ theReferent->firstRefPosition = rp;
+ }
+ theReferent->recentRefPosition = rp;
+ theReferent->lastRefPosition = rp;
+ }
+ else
+ {
+ assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs));
+ }
+}
+
+//---------------------------------------------------------------------------
+// newRefPosition: allocate and initialize a new RefPosition.
+//
+// Arguments:
+// reg - reg number that identifies RegRecord to be associated
+// with this RefPosition
+// theLocation - LSRA location of RefPosition
+// theRefType - RefPosition type
+// theTreeNode - GenTree node for which this RefPosition is created
+// mask - Set of valid registers for this RefPosition
+// multiRegIdx - register position if this RefPosition corresponds to a
+// multi-reg call node.
+//
+// Return Value:
+// a new RefPosition
+//
+RefPosition* LinearScan::newRefPosition(
+ regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask)
+{
+ RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
+
+ newRP->setReg(getRegisterRecord(reg));
+ newRP->registerAssignment = mask;
+
+ newRP->setMultiRegIdx(0);
+ newRP->setAllocateIfProfitable(false);
+
+ associateRefPosWithInterval(newRP);
+
+ DBEXEC(VERBOSE, newRP->dump());
+ return newRP;
+}
+
+//---------------------------------------------------------------------------
+// newRefPosition: allocate and initialize a new RefPosition.
+//
+// Arguments:
+// theInterval - interval to which RefPosition is associated with.
+// theLocation - LSRA location of RefPosition
+// theRefType - RefPosition type
+// theTreeNode - GenTree node for which this RefPosition is created
+// mask - Set of valid registers for this RefPosition
+// multiRegIdx - register position if this RefPosition corresponds to a
+// multi-reg call node.
+//
+// Return Value:
+// a new RefPosition
+//
+RefPosition* LinearScan::newRefPosition(Interval* theInterval,
+ LsraLocation theLocation,
+ RefType theRefType,
+ GenTree* theTreeNode,
+ regMaskTP mask,
+ unsigned multiRegIdx /* = 0 */)
+{
+#ifdef DEBUG
+ if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
+ {
+ // In the case we're using floating point registers we must make sure
+ // this flag was set previously in the compiler since this will mandate
+ // whether LSRA will take into consideration FP reg killsets.
+ assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0));
+ }
+#endif // DEBUG
+
+ // If this reference is constrained to a single register (and it's not a dummy
+ // or Kill reftype already), add a RefTypeFixedReg at this location so that its
+ // availability can be more accurately determined
+
+ bool isFixedRegister = isSingleRegister(mask);
+ bool insertFixedRef = false;
+ if (isFixedRegister)
+ {
+ // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB)
+ if (theRefType == RefTypeUse || theRefType == RefTypeDef)
+ {
+ insertFixedRef = true;
+ }
+ }
+
+ if (insertFixedRef)
+ {
+ regNumber physicalReg = genRegNumFromMask(mask);
+ RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
+ assert(theInterval != nullptr);
+ assert((allRegs(theInterval->registerType) & mask) != 0);
+ }
+
+ RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
+
+ newRP->setInterval(theInterval);
+
+ // Spill info
+ newRP->isFixedRegRef = isFixedRegister;
+
+#ifndef _TARGET_AMD64_
+ // We don't need this for AMD because the PInvoke method epilog code is explicit
+ // at register allocation time.
+ if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged &&
+ theInterval->varNum == compiler->genReturnLocal)
+ {
+ mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
+ noway_assert(mask != RBM_NONE);
+ }
+#endif // !_TARGET_AMD64_
+ newRP->registerAssignment = mask;
+
+ newRP->setMultiRegIdx(multiRegIdx);
+ newRP->setAllocateIfProfitable(false);
+
+ associateRefPosWithInterval(newRP);
+
+ DBEXEC(VERBOSE, newRP->dump());
+ return newRP;
+}
+
+//------------------------------------------------------------------------
+// IsContainableMemoryOp: Checks whether this is a memory op that can be contained.
+//
+// Arguments:
+// node - the node of interest.
+//
+// Return value:
+// True if this will definitely be a memory reference that could be contained.
+//
+// Notes:
+// This differs from the isMemoryOp() method on GenTree because it checks for
+// the case of doNotEnregister local. This won't include locals that
+// for some other reason do not become register candidates, nor those that get
+// spilled.
+// Also, because we usually call this before we redo dataflow, any new lclVars
+// introduced after the last dataflow analysis will not yet be marked lvTracked,
+// so we don't use that.
+//
+bool LinearScan::isContainableMemoryOp(GenTree* node)
+{
+ if (node->isMemoryOp())
+ {
+ return true;
+ }
+ if (node->IsLocal())
+ {
+ if (!enregisterLocalVars)
+ {
+ return true;
+ }
+ LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum];
+ return varDsc->lvDoNotEnregister;
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------
+// addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'.
+//
+// Arguments:
+// mask - the mask (set) of registers.
+// currentLoc - the location at which they should be added
+// refType - the type of refposition
+// isLastUse - true IFF this is a last use of the register
+//
+void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
+{
+ for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
+ {
+ if (mask & 1)
+ {
+ // This assumes that these are all "special" RefTypes that
+ // don't need to be recorded on the tree (hence treeNode is nullptr)
+ RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
+ genRegMask(reg)); // This MUST occupy the physical register (obviously)
+
+ if (isLastUse)
+ {
+ pos->lastUse = true;
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node.
+// If the GT_STOREIND will generate a write barrier, determine the specific kill
+// set required by the case-specific, platform-specific write barrier. If no
+// write barrier is required, the kill set will be RBM_NONE.
+//
+// Arguments:
+// tree - the GT_STOREIND node
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
+{
+ assert(tree->OperIs(GT_STOREIND));
+
+ regMaskTP killMask = RBM_NONE;
+
+ GenTree* data = tree->Data();
+
+ GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm))
+ {
+ // We can't determine the exact helper to be used at this point, because it depends on
+ // the allocated register for the `data` operand. However, all the (x86) optimized
+ // helpers have the same kill set: EDX.
+ killMask = RBM_CALLEE_TRASH_NOGC;
+ }
+ else
+ {
+ // Figure out which helper we're going to use, and then get the kill set for that helper.
+ CorInfoHelpFunc helper =
+ compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm);
+ killMask = compiler->compHelperCallKillSet(helper);
+ }
+ }
+
+ return killMask;
+}
+
+//------------------------------------------------------------------------
+// getKillSetForNode: Return the registers killed by the given tree node.
+//
+// Arguments:
+// compiler - the compiler context to use
+// tree - the tree for which the kill set is needed.
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
+{
+ regMaskTP killMask = RBM_NONE;
+ switch (tree->OperGet())
+ {
+#ifdef _TARGET_XARCH_
+ case GT_MUL:
+ // We use the 128-bit multiply when performing an overflow checking unsigned multiply
+ //
+ if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx())
+ {
+ // Both RAX and RDX are killed by the operation
+ killMask = RBM_RAX | RBM_RDX;
+ }
+ break;
+
+ case GT_MULHI:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
+ killMask = RBM_RAX | RBM_RDX;
+ break;
+
+ case GT_MOD:
+ case GT_DIV:
+ case GT_UMOD:
+ case GT_UDIV:
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ // Both RAX and RDX are killed by the operation
+ killMask = RBM_RAX | RBM_RDX;
+ }
+ break;
+#endif // _TARGET_XARCH_
+
+ case GT_STORE_OBJ:
+ if (tree->OperIsCopyBlkOp())
+ {
+ assert(tree->AsObj()->gtGcPtrCount != 0);
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
+ break;
+ }
+ __fallthrough;
+
+ case GT_STORE_BLK:
+ case GT_STORE_DYN_BLK:
+ {
+ GenTreeBlk* blkNode = tree->AsBlk();
+ bool isCopyBlk = varTypeIsStruct(blkNode->Data());
+ switch (blkNode->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
+ }
+ else
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
+ }
+ break;
+
+#ifdef _TARGET_XARCH_
+ case GenTreeBlk::BlkOpKindRepInstr:
+ if (isCopyBlk)
+ {
+ // rep movs kills RCX, RDI and RSI
+ killMask = RBM_RCX | RBM_RDI | RBM_RSI;
+ }
+ else
+ {
+ // rep stos kills RCX and RDI.
+ // (Note that the Data() node, if not constant, will be assigned to
+ // RCX, but it's find that this kills it, as the value is not available
+ // after this node in any case.)
+ killMask = RBM_RDI | RBM_RCX;
+ }
+ break;
+#else
+ case GenTreeBlk::BlkOpKindRepInstr:
+#endif
+ case GenTreeBlk::BlkOpKindUnroll:
+ case GenTreeBlk::BlkOpKindInvalid:
+ // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
+ break;
+ }
+ }
+ break;
+
+ case GT_RETURNTRAP:
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+ break;
+ case GT_CALL:
+#ifdef _TARGET_X86_
+ if (compiler->compFloatingPointUsed)
+ {
+ if (tree->TypeGet() == TYP_DOUBLE)
+ {
+ needDoubleTmpForFPCall = true;
+ }
+ else if (tree->TypeGet() == TYP_FLOAT)
+ {
+ needFloatTmpForFPCall = true;
+ }
+ }
+#endif // _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+ if (tree->IsHelperCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
+ killMask = compiler->compHelperCallKillSet(helpFunc);
+ }
+ else
+#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+ {
+ // if there is no FP used, we can ignore the FP kills
+ if (compiler->compFloatingPointUsed)
+ {
+ killMask = RBM_CALLEE_TRASH;
+ }
+ else
+ {
+ killMask = RBM_INT_CALLEE_TRASH;
+ }
+#ifdef _TARGET_ARM_
+ if (tree->AsCall()->IsVirtualStub())
+ {
+ killMask |= compiler->virtualStubParamInfo->GetRegMask();
+ }
+#else // !_TARGET_ARM_
+ // Verify that the special virtual stub call registers are in the kill mask.
+ // We don't just add them unconditionally to the killMask because for most architectures
+ // they are already in the RBM_CALLEE_TRASH set,
+ // and we don't want to introduce extra checks and calls in this hot function.
+ assert(!tree->AsCall()->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) ==
+ compiler->virtualStubParamInfo->GetRegMask()));
+#endif
+ }
+ break;
+ case GT_STOREIND:
+ killMask = getKillSetForStoreInd(tree->AsStoreInd());
+ break;
+
+#if defined(PROFILING_SUPPORTED)
+ // If this method requires profiler ELT hook then mark these nodes as killing
+ // callee trash registers (excluding RAX and XMM0). The reason for this is that
+ // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for
+ // more details.
+ case GT_RETURN:
+ if (compiler->compIsProfilerHookNeeded())
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+ break;
+
+ case GT_PROF_HOOK:
+ if (compiler->compIsProfilerHookNeeded())
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL);
+ }
+ break;
+#endif // PROFILING_SUPPORTED
+
+ default:
+ // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
+ break;
+ }
+ return killMask;
+}
+
+//------------------------------------------------------------------------
+// buildKillPositionsForNode:
+// Given some tree node add refpositions for all the registers this node kills
+//
+// Arguments:
+// tree - the tree for which kill positions should be generated
+// currentLoc - the location at which the kills should be added
+//
+// Return Value:
+// true - kills were inserted
+// false - no kills were inserted
+//
+// Notes:
+// The return value is needed because if we have any kills, we need to make sure that
+// all defs are located AFTER the kills. On the other hand, if there aren't kills,
+// the multiple defs for a regPair are in different locations.
+// If we generate any kills, we will mark all currentLiveVars as being preferenced
+// to avoid the killed registers. This is somewhat conservative.
+
+bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc)
+{
+ regMaskTP killMask = getKillSetForNode(tree);
+ bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
+ if (killMask != RBM_NONE)
+ {
+ // The killMask identifies a set of registers that will be used during codegen.
+ // Mark these as modified here, so when we do final frame layout, we'll know about
+ // all these registers. This is especially important if killMask contains
+ // callee-saved registers, which affect the frame size since we need to save/restore them.
+ // In the case where we have a copyBlk with GC pointers, can need to call the
+ // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if
+ // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen,
+ // which is too late.
+ compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true));
+
+ addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);
+
+ // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee
+ // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs,
+ // many of which appear to be regressions (because there is more spill on the infrequently path),
+ // but are not really because the frequent path becomes smaller. Validating these diffs will need
+ // to be done before making this change.
+ // if (!blockSequence[curBBSeqNum]->isRunRarely())
+ if (enregisterLocalVars)
+ {
+ VarSetOps::Iter iter(compiler, currentLiveVars);
+ unsigned varIndex = 0;
+ while (iter.NextElem(&varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
+ {
+ if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
+ {
+ continue;
+ }
+ }
+ else
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (varTypeIsFloating(varDsc) &&
+ !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex))
+ {
+ continue;
+ }
+ Interval* interval = getIntervalForLocalVar(varIndex);
+ if (isCallKill)
+ {
+ interval->preferCalleeSave = true;
+ }
+ regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask);
+
+ if (newPreferences != RBM_NONE)
+ {
+ interval->updateRegisterPreferences(newPreferences);
+ }
+ else
+ {
+ // If there are no callee-saved registers, the call could kill all the registers.
+ // This is a valid state, so in that case assert should not trigger. The RA will spill in order to
+ // free a register later.
+ assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE);
+ }
+ }
+ }
+
+ if (compiler->killGCRefs(tree))
+ {
+ RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree,
+ (allRegs(TYP_REF) & ~RBM_ARG_REGS));
+ }
+ return true;
+ }
+
+ return false;
+}
+
+//----------------------------------------------------------------------------
+// defineNewInternalTemp: Defines a ref position for an internal temp.
+//
+// Arguments:
+// tree - Gentree node requiring an internal register
+// regType - Register type
+// currentLoc - Location of the temp Def position
+// regMask - register mask of candidates for temp
+//
+RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask)
+{
+ Interval* current = newInterval(regType);
+ current->isInternal = true;
+ return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0);
+}
+
+//------------------------------------------------------------------------
+// buildInternalRegisterDefsForNode - build Def positions for internal
+// registers required for tree node.
+//
+// Arguments:
+// tree - Gentree node that needs internal registers
+// temps - in-out array which is populated with ref positions
+// created for Def of internal registers
+//
+// Returns:
+// The total number of Def positions created for internal registers of tree no.
+int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* temps[])
+{
+ int count;
+ int internalIntCount = info->internalIntCount;
+ regMaskTP internalCands = info->getInternalCandidates(this);
+
+ // If the number of internal integer registers required is the same as the number of candidate integer registers in
+ // the candidate set, then they must be handled as fixed registers.
+ // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
+ bool fixedRegs = false;
+ regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
+ if (((int)genCountBits(internalIntCandidates)) == internalIntCount)
+ {
+ fixedRegs = true;
+ }
+
+ for (count = 0; count < internalIntCount; count++)
+ {
+ regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT));
+ if (fixedRegs)
+ {
+ internalIntCands = genFindLowestBit(internalIntCands);
+ internalCands &= ~internalIntCands;
+ }
+ temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands);
+ }
+
+ int internalFloatCount = info->internalFloatCount;
+ for (int i = 0; i < internalFloatCount; i++)
+ {
+ regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates());
+ temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, internalFPCands);
+ }
+
+ assert(count < MaxInternalRegisters);
+ assert(count == (internalIntCount + internalFloatCount));
+ return count;
+}
+
+//------------------------------------------------------------------------
+// buildInternalRegisterUsesForNode - adds Use positions for internal
+// registers required for tree node.
+//
+// Arguments:
+// tree - Gentree node that needs internal registers
+// defs - int array containing Def positions of internal
+// registers.
+// total - Total number of Def positions in 'defs' array.
+//
+// Returns:
+// Void.
+void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total)
+{
+ assert(total < MaxInternalRegisters);
+
+ // defs[] has been populated by buildInternalRegisterDefsForNode
+ // now just add uses to the defs previously added.
+ for (int i = 0; i < total; i++)
+ {
+ RefPosition* prevRefPosition = defs[i];
+ assert(prevRefPosition != nullptr);
+ regMaskTP mask = prevRefPosition->registerAssignment;
+ if (prevRefPosition->isPhysRegRef)
+ {
+ newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask);
+ }
+ else
+ {
+ RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0);
+
+ if (info->isInternalRegDelayFree)
+ {
+ newest->delayRegFree = true;
+ }
+ }
+ }
+}
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+//------------------------------------------------------------------------
+// buildUpperVectorSaveRefPositions - Create special RefPositions for saving
+// the upper half of a set of large vector.
+//
+// Arguments:
+// tree - The current node being handled
+// currentLoc - The location of the current node
+//
+// Return Value: Returns the set of lclVars that are killed by this node, and therefore
+// required RefTypeUpperVectorSaveDef RefPositions.
+//
+// Notes: The returned set is used by buildUpperVectorRestoreRefPositions.
+//
+VARSET_VALRET_TP
+LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc)
+{
+ assert(enregisterLocalVars);
+ VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler));
+ regMaskTP fpCalleeKillSet = RBM_NONE;
+ if (!VarSetOps::IsEmpty(compiler, largeVectorVars))
+ {
+ // We actually need to find any calls that kill the upper-half of the callee-save vector registers.
+ // But we will use as a proxy any node that kills floating point registers.
+ // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
+ fpCalleeKillSet = getKillSetForNode(tree);
+ if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
+ {
+ VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
+ VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
+ VarSetOps::Iter iter(compiler, liveLargeVectors);
+ unsigned varIndex = 0;
+ while (iter.NextElem(&varIndex))
+ {
+ Interval* varInterval = getIntervalForLocalVar(varIndex);
+ Interval* tempInterval = newInterval(varInterval->registerType);
+ tempInterval->isInternal = true;
+ RefPosition* pos =
+ newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
+ // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
+ // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
+ // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval,
+ // and set varInterval as the relatedInterval of tempInterval.
+ tempInterval->relatedInterval = varInterval->relatedInterval;
+ varInterval->relatedInterval = tempInterval;
+ }
+ }
+ }
+ return liveLargeVectors;
+}
+
+// buildUpperVectorRestoreRefPositions - Create special RefPositions for restoring
+// the upper half of a set of large vectors.
+//
+// Arguments:
+// tree - The current node being handled
+// currentLoc - The location of the current node
+// liveLargeVectors - The set of lclVars needing restores (returned by buildUpperVectorSaveRefPositions)
+//
+void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree,
+ LsraLocation currentLoc,
+ VARSET_VALARG_TP liveLargeVectors)
+{
+ assert(enregisterLocalVars);
+ if (!VarSetOps::IsEmpty(compiler, liveLargeVectors))
+ {
+ VarSetOps::Iter iter(compiler, liveLargeVectors);
+ unsigned varIndex = 0;
+ while (iter.NextElem(&varIndex))
+ {
+ Interval* varInterval = getIntervalForLocalVar(varIndex);
+ Interval* tempInterval = varInterval->relatedInterval;
+ assert(tempInterval->isInternal == true);
+ RefPosition* pos =
+ newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED);
+ // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
+ // of tempInterval.
+ varInterval->relatedInterval = tempInterval->relatedInterval;
+ tempInterval->relatedInterval = varInterval;
+ }
+ }
+}
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// ComputeOperandDstCount: computes the number of registers defined by a
+// node.
+//
+// For most nodes, this is simple:
+// - Nodes that do not produce values (e.g. stores and other void-typed
+// nodes) and nodes that immediately use the registers they define
+// produce no registers
+// - Nodes that are marked as defining N registers define N registers.
+//
+// For contained nodes, however, things are more complicated: for purposes
+// of bookkeeping, a contained node is treated as producing the transitive
+// closure of the registers produced by its sources.
+//
+// Arguments:
+// operand - The operand for which to compute a register count.
+//
+// Returns:
+// The number of registers defined by `operand`.
+//
+// static
+int LinearScan::ComputeOperandDstCount(GenTree* operand)
+{
+ // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though
+ // note that it is not in the linear order. It seems best to check for !IsLIR() rather than
+ // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method.
+ if (!operand->IsLIR())
+ {
+ return 0;
+ }
+ if (operand->isContained())
+ {
+ int dstCount = 0;
+ for (GenTree* op : operand->Operands())
+ {
+ dstCount += ComputeOperandDstCount(op);
+ }
+
+ return dstCount;
+ }
+ if (operand->IsUnusedValue())
+ {
+ // Operands that define an unused value do not produce any registers.
+ return 0;
+ }
+ if (operand->IsValue())
+ {
+ // Operands that are values and are not contained consume all of their operands
+ // and produce one or more registers.
+ return operand->GetRegisterDstCount();
+ }
+ else
+ {
+ // This must be one of the operand types that are neither contained nor produce a value.
+ // Stores and void-typed operands may be encountered when processing call nodes, which contain
+ // pointers to argument setup stores.
+ assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
+ operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() ||
+ operand->TypeGet() == TYP_VOID);
+ return 0;
+ }
+}
+
+//------------------------------------------------------------------------
+// ComputeAvailableSrcCount: computes the number of registers available as
+// sources for a node.
+//
+// This is simply the sum of the number of registers produced by each
+// operand to the node.
+//
+// Arguments:
+// node - The node for which to compute a source count.
+//
+// Return Value:
+// The number of registers available as sources for `node`.
+//
+// static
+int LinearScan::ComputeAvailableSrcCount(GenTree* node)
+{
+ int numSources = 0;
+ for (GenTree* operand : node->Operands())
+ {
+ numSources += ComputeOperandDstCount(operand);
+ }
+
+ return numSources;
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// buildRefPositionsForNode: The main entry point for building the RefPositions
+// and "tree temp" Intervals for a given node.
+//
+// Arguments:
+// tree - The node for which we are building RefPositions
+// block - The BasicBlock in which the node resides
+// currentLoc - The LsraLocation of the given node
+//
+void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc)
+{
+#ifdef _TARGET_ARM_
+ assert(!isRegPairType(tree->TypeGet()));
+#endif // _TARGET_ARM_
+
+ // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes.
+ // GT_CLS_VAR nodes should have been eliminated by rationalizer.
+ assert(tree->OperGet() != GT_ARGPLACE);
+ assert(tree->OperGet() != GT_LIST);
+ assert(tree->OperGet() != GT_CLS_VAR);
+
+ // The LIR traversal visits only the first node in a GT_FIELD_LIST.
+ assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead());
+
+ // The set of internal temporary registers used by this node are stored in the
+ // gtRsvdRegs register mask. Clear it out.
+ tree->gtRsvdRegs = RBM_NONE;
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ dumpDefList();
+ compiler->gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif // DEBUG
+
+ // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will
+ // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will
+ // be thrown away when we're done.
+ LocationInfoListNode* locationInfo = nullptr;
+ TreeNodeInfo tempInfo;
+ TreeNodeInfo* info = nullptr;
+ int consume = 0;
+ int produce = 0;
+ if (!tree->isContained())
+ {
+ if (tree->IsValue())
+ {
+ locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree);
+ currentNodeInfo = &locationInfo->info;
+ }
+ else
+ {
+ currentNodeInfo = &tempInfo;
+ }
+ info = currentNodeInfo;
+ info->Initialize(this, tree);
+ BuildNode(tree);
+ assert(info->IsValid(this));
+ consume = info->srcCount;
+ produce = info->dstCount;
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf(" +");
+ info->dump(this);
+ tree->dumpLIRFlags();
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ if (tree->isContained())
+ {
+ JITDUMP("Contained\n");
+ }
+ else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && tree->IsUnusedValue())
+ {
+ JITDUMP("Unused\n");
+ }
+ else
+ {
+ JITDUMP(" consume=%d produce=%d\n", consume, produce);
+ }
+ }
+#endif // DEBUG
+
+ assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume));
+
+ if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD))
+ {
+ LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
+ if (isCandidateVar(varDsc))
+ {
+ assert(consume == 0);
+
+ // We handle tracked variables differently from non-tracked ones. If it is tracked,
+ // we simply add a use or def of the tracked variable. Otherwise, for a use we need
+ // to actually add the appropriate references for loading or storing the variable.
+ //
+ // It won't actually get used or defined until the appropriate ancestor tree node
+ // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
+ // to a call
+
+ assert(varDsc->lvTracked);
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ // We have only approximate last-use information at this point. This is because the
+ // execution order doesn't actually reflect the true order in which the localVars
+ // are referenced - but the order of the RefPositions will, so we recompute it after
+ // RefPositions are built.
+ // Use the old value for setting currentLiveVars - note that we do this with the
+ // not-quite-correct setting of lastUse. However, this is OK because
+ // 1) this is only for preferencing, which doesn't require strict correctness, and
+ // 2) the cases where these out-of-order uses occur should not overlap a kill.
+ // TODO-Throughput: clean this up once we have the execution order correct. At that point
+ // we can update currentLiveVars at the same place that we create the RefPosition.
+ if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex);
+ }
+
+ if (!tree->IsUnusedValue() && !tree->isContained())
+ {
+ assert(produce != 0);
+
+ locationInfo->interval = getIntervalForLocalVar(varIndex);
+ defList.Append(locationInfo);
+ }
+ return;
+ }
+ }
+ if (tree->isContained())
+ {
+ return;
+ }
+
+ // Handle the case of local variable assignment
+ Interval* varDefInterval = nullptr;
+
+ GenTree* defNode = tree;
+
+ // noAdd means the node creates a def but for purposes of map
+ // management do not add it because data is not flowing up the
+ // tree
+
+ bool noAdd = info->isLocalDefUse;
+ RefPosition* prevPos = nullptr;
+
+ bool isSpecialPutArg = false;
+
+ assert(!tree->OperIsAssignment());
+ if (tree->OperIsLocalStore())
+ {
+ GenTreeLclVarCommon* const store = tree->AsLclVarCommon();
+ assert((consume > 1) || (regType(store->gtOp1->TypeGet()) == regType(store->TypeGet())));
+
+ LclVarDsc* varDsc = &compiler->lvaTable[store->gtLclNum];
+ if (isCandidateVar(varDsc))
+ {
+ // We always push the tracked lclVar intervals
+ assert(varDsc->lvTracked);
+ unsigned varIndex = varDsc->lvVarIndex;
+ varDefInterval = getIntervalForLocalVar(varIndex);
+ assert((store->gtFlags & GTF_VAR_DEF) != 0);
+ defNode = tree;
+ if (produce == 0)
+ {
+ produce = 1;
+ noAdd = true;
+ }
+
+ assert(consume <= MAX_RET_REG_COUNT);
+ if (consume == 1)
+ {
+ // Get the location info for the register defined by the first operand.
+ LocationInfoListNode& operandInfo = *(useList.Begin());
+ assert(operandInfo.treeNode == tree->gtGetOp1());
+
+ Interval* srcInterval = operandInfo.interval;
+ if (srcInterval->relatedInterval == nullptr)
+ {
+ // Preference the source to the dest, unless this is a non-last-use localVar.
+ // Note that the last-use info is not correct, but it is a better approximation than preferencing
+ // the source to the dest, if the source's lifetime extends beyond the dest.
+ if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ srcInterval->assignRelatedInterval(varDefInterval);
+ }
+ }
+ else if (!srcInterval->isLocalVar)
+ {
+ // Preference the source to dest, if src is not a local var.
+ srcInterval->assignRelatedInterval(varDefInterval);
+ }
+ }
+
+ if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ VarSetOps::AddElemD(compiler, currentLiveVars, varIndex);
+ }
+ }
+ else if (store->gtOp1->OperIs(GT_BITCAST))
+ {
+ store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet();
+
+ // Get the location info for the register defined by the first operand.
+ LocationInfoListNode& operandInfo = *(useList.Begin());
+ assert(operandInfo.treeNode == tree->gtGetOp1());
+
+ Interval* srcInterval = operandInfo.interval;
+ srcInterval->registerType = regType(store->TypeGet());
+
+ RefPosition* srcDefPosition = srcInterval->firstRefPosition;
+ assert(srcDefPosition != nullptr);
+ assert(srcDefPosition->refType == RefTypeDef);
+ assert(srcDefPosition->treeNode == store->gtOp1);
+
+ srcDefPosition->registerAssignment = allRegs(store->TypeGet());
+ operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet()));
+ }
+ }
+ else if (noAdd && produce == 0)
+ {
+ // Dead nodes may remain after tree rationalization, decomposition or lowering.
+ // They should be marked as UnusedValue.
+ // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation.
+ assert(!noAdd || (produce != 0));
+ }
+
+ Interval* prefSrcInterval = nullptr;
+
+ // If this is a binary operator that will be encoded with 2 operand fields
+ // (i.e. the target is read-modify-write), preference the dst to op1.
+
+ bool hasDelayFreeSrc = info->hasDelayFreeSrc;
+
+#if defined(DEBUG) && defined(_TARGET_X86_)
+ // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode
+ // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the
+ // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in
+ // situations like the following:
+ //
+ // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d>
+ //
+ // /--* t1026 ref
+ // t1352 = * putarg_reg ref REG NA
+ //
+ // t342 = lclVar int V14 loc6 u:4 REG NA $50c
+ //
+ // t343 = const int 1 REG NA $41
+ //
+ // /--* t342 int
+ // +--* t343 int
+ // t344 = * + int REG NA $495
+ //
+ // t345 = lclVar int V04 arg4 u:2 REG NA $100
+ //
+ // /--* t344 int
+ // +--* t345 int
+ // t346 = * % int REG NA $496
+ //
+ // /--* t346 int
+ // t1353 = * putarg_reg int REG NA
+ //
+ // t1354 = lclVar ref V52 tmp35 (last use) REG NA
+ //
+ // /--* t1354 ref
+ // t1355 = * lea(b+0) byref REG NA
+ //
+ // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the
+ // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register
+ // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to
+ // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be
+ // available due to the special put arg.
+ const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER;
+#else
+ const bool supportsSpecialPutArg = true;
+#endif
+
+ if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) &&
+ (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use,
+ // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
+ // (which is actually what we hope will happen).
+ JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
+
+ // Get the register information for the first operand of the node.
+ LocationInfoListNode* operandDef = useList.Begin();
+ assert(operandDef->treeNode == tree->gtGetOp1());
+
+ // Preference the destination to the interval of the first register defined by the first operand.
+ Interval* srcInterval = operandDef->interval;
+ assert(srcInterval->isLocalVar);
+ prefSrcInterval = srcInterval;
+ isSpecialPutArg = true;
+ }
+
+ RefPosition* internalRefs[MaxInternalRegisters];
+
+#ifdef DEBUG
+ // If we are constraining the registers for allocation, we will modify all the RefPositions
+ // we've built for this node after we've created them. In order to do that, we'll remember
+ // the last RefPosition prior to those created for this node.
+ RefPositionIterator refPositionMark = refPositions.backPosition();
+#endif // DEBUG
+
+ // Make intervals for all the 'internal' register requirements for this node,
+ // where internal means additional registers required temporarily.
+ // Create a RefTypeDef RefPosition for each such interval.
+ int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs);
+
+ // Make use RefPositions for all used values.
+ int consumed = 0;
+ for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end;
+ listNode = listNode->Next())
+ {
+ LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode);
+
+ // For tree temps, a use is always a last use and the end of the range;
+ // this is set by default in newRefPosition
+ GenTree* const useNode = locInfo.treeNode;
+ assert(useNode != nullptr);
+
+ Interval* srcInterval = locInfo.interval;
+ TreeNodeInfo& useNodeInfo = locInfo.info;
+ if (useNodeInfo.isTgtPref)
+ {
+ prefSrcInterval = srcInterval;
+ }
+
+ const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree);
+
+ regMaskTP candidates = useNodeInfo.getSrcCandidates(this);
+#ifdef _TARGET_ARM_
+ regMaskTP allCandidates = candidates;
+
+ if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp())
+ {
+ // get i-th candidate, set bits in useCandidates must be in sequential order.
+ candidates = genFindLowestReg(allCandidates);
+ allCandidates &= ~candidates;
+ }
+#endif // _TARGET_ARM_
+
+ assert((candidates & allRegs(srcInterval->registerType)) != 0);
+
+ // For non-localVar uses we record nothing, as nothing needs to be written back to the tree.
+ GenTree* const refPosNode = srcInterval->isLocalVar ? useNode : nullptr;
+ RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, 0);
+ if (delayRegFree)
+ {
+ pos->delayRegFree = true;
+ }
+
+ if (useNode->IsRegOptional())
+ {
+ pos->setAllocateIfProfitable(true);
+ }
+ consumed++;
+
+ // Create additional use RefPositions for multi-reg nodes.
+ for (int idx = 1; idx < locInfo.info.dstCount; idx++)
+ {
+ noway_assert(srcInterval->relatedInterval != nullptr);
+ srcInterval = srcInterval->relatedInterval;
+#ifdef _TARGET_ARM_
+ if (useNode->OperIsPutArgSplit() ||
+ (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST)))
+ {
+ // get first candidate, set bits in useCandidates must be in sequential order.
+ candidates = genFindLowestReg(allCandidates);
+ allCandidates &= ~candidates;
+ }
+#endif // _TARGET_ARM_
+ RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, idx);
+ consumed++;
+ }
+ }
+
+ assert(consumed == consume);
+ if (consume != 0)
+ {
+ listNodePool.ReturnNodes(useList);
+ }
+
+ buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount);
+
+ RegisterType registerType = getDefType(tree);
+ regMaskTP candidates = info->getDstCandidates(this);
+ regMaskTP useCandidates = info->getSrcCandidates(this);
+
+#ifdef DEBUG
+ if (VERBOSE && produce)
+ {
+ printf("Def candidates ");
+ dumpRegMask(candidates);
+ printf(", Use candidates ");
+ dumpRegMask(useCandidates);
+ printf("\n");
+ }
+#endif // DEBUG
+
+#if defined(_TARGET_AMD64_)
+ // Multi-reg call node is the only node that could produce multi-reg value
+ assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
+#endif // _TARGET_xxx_
+
+ // Add kill positions before adding def positions
+ buildKillPositionsForNode(tree, currentLoc + 1);
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ VARSET_TP liveLargeVectors(VarSetOps::UninitVal());
+ if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE))
+ {
+ // Build RefPositions for saving any live large vectors.
+ // This must be done after the kills, so that we know which large vectors are still live.
+ VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1));
+ }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+ ReturnTypeDesc* retTypeDesc = nullptr;
+ bool isMultiRegCall = tree->IsMultiRegCall();
+ if (isMultiRegCall)
+ {
+ retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
+ assert((int)genCountBits(candidates) == produce);
+ assert(candidates == retTypeDesc->GetABIReturnRegs());
+ }
+
+ // push defs
+ LocationInfoList locationInfoList;
+ LsraLocation defLocation = currentLoc + 1;
+ Interval* interval = varDefInterval;
+ // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field.
+ // Keep track of the previous interval allocated, for that purpose.
+ Interval* prevInterval = nullptr;
+ for (int i = 0; i < produce; i++)
+ {
+ regMaskTP currCandidates = candidates;
+
+ // In case of multi-reg call node, registerType is given by
+ // the type of ith position return register.
+ if (isMultiRegCall)
+ {
+ registerType = retTypeDesc->GetReturnRegType((unsigned)i);
+ currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i));
+ useCandidates = allRegs(registerType);
+ }
+
+#ifdef _TARGET_ARM_
+ // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order.
+ if (tree->OperIsPutArgSplit() || tree->OperIsMultiRegOp())
+ {
+ // get i-th candidate
+ currCandidates = genFindLowestReg(candidates);
+ candidates &= ~currCandidates;
+ }
+#endif // _TARGET_ARM_
+
+ if (interval == nullptr)
+ {
+ // Make a new interval
+ interval = newInterval(registerType);
+ if (hasDelayFreeSrc || info->isInternalRegDelayFree)
+ {
+ interval->hasInterferingUses = true;
+ }
+ else if (tree->OperIsConst())
+ {
+ assert(!tree->IsReuseRegVal());
+ interval->isConstant = true;
+ }
+
+ if ((currCandidates & useCandidates) != RBM_NONE)
+ {
+ interval->updateRegisterPreferences(currCandidates & useCandidates);
+ }
+
+ if (isSpecialPutArg)
+ {
+ interval->isSpecialPutArg = true;
+ }
+ }
+ else
+ {
+ assert(registerTypesEquivalent(interval->registerType, registerType));
+ }
+
+ if (prefSrcInterval != nullptr)
+ {
+ interval->assignRelatedIntervalIfUnassigned(prefSrcInterval);
+ }
+
+ // for assignments, we want to create a refposition for the def
+ // but not push it
+ if (!noAdd)
+ {
+ if (i == 0)
+ {
+ locationInfo->interval = interval;
+ prevInterval = interval;
+ defList.Append(locationInfo);
+ }
+ else
+ {
+ // This is the 2nd or subsequent register defined by a multi-reg node.
+ // Connect them using 'relatedInterval'.
+ noway_assert(prevInterval != nullptr);
+ prevInterval->relatedInterval = interval;
+ prevInterval = interval;
+ prevInterval->isMultiReg = true;
+ interval->isMultiReg = true;
+ }
+ }
+
+ RefPosition* pos = newRefPosition(interval, defLocation, RefTypeDef, defNode, currCandidates, (unsigned)i);
+ if (info->isLocalDefUse)
+ {
+ // This must be an unused value, OR it is a special node for which we allocate
+ // a target register even though it produces no value.
+ assert(defNode->IsUnusedValue() || (defNode->gtOper == GT_LOCKADD));
+ pos->isLocalDefUse = true;
+ pos->lastUse = true;
+ }
+ interval->updateRegisterPreferences(currCandidates);
+ interval->updateRegisterPreferences(useCandidates);
+ interval = nullptr;
+ }
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // SaveDef position must be at the same location as Def position of call node.
+ if (enregisterLocalVars)
+ {
+ buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors);
+ }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+#ifdef DEBUG
+ // If we are constraining registers, modify all the RefPositions we've just built to specify the
+ // minimum reg count required.
+ if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT))
+ {
+ // The number of registers required for a tree node is the sum of
+ // consume + produce + internalCount. This is the minimum
+ // set of registers that needs to be ensured in candidate
+ // set of ref positions created.
+ unsigned minRegCount = consume + produce + info->internalIntCount + info->internalFloatCount;
+
+ for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++)
+ {
+ RefPosition* newRefPosition = &(*refPositionMark);
+ unsigned minRegCountForRef = minRegCount;
+ if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree)
+ {
+ // If delayRegFree, then Use will interfere with the destination of the consuming node.
+ // Therefore, we also need add the kill set of the consuming node to minRegCount.
+ //
+ // For example consider the following IR on x86, where v01 and v02
+ // are method args coming in ecx and edx respectively.
+ // GT_DIV(v01, v02)
+ //
+ // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node.
+ //
+ // Assume further JitStressRegs=2, which would constrain candidates to callee trashable
+ // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01.
+ // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and
+ // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for
+ // the use position of v02 also needs to take into account the kill set of its consuming node.
+ regMaskTP killMask = getKillSetForNode(tree);
+ if (killMask != RBM_NONE)
+ {
+ minRegCountForRef += genCountBits(killMask);
+ }
+ }
+ newRefPosition->minRegCandidateCount = minRegCountForRef;
+ if (newRefPosition->IsActualRef() && doReverseCallerCallee())
+ {
+ Interval* interval = newRefPosition->getInterval();
+ regMaskTP oldAssignment = newRefPosition->registerAssignment;
+ regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType);
+ newRefPosition->registerAssignment =
+ getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef);
+ if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) &&
+ !interval->isLocalVar)
+ {
+ checkConflictingDefUse(newRefPosition);
+ }
+ }
+ }
+ }
+#endif // DEBUG
+ JITDUMP("\n");
+}
+
+//------------------------------------------------------------------------
+// buildPhysRegRecords: Make an interval for each physical register
+//
+void LinearScan::buildPhysRegRecords()
+{
+ RegisterType regType = IntRegisterType;
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ RegRecord* curr = &physRegs[reg];
+ curr->init(reg);
+ }
+}
+
+//------------------------------------------------------------------------
+// getNonEmptyBlock: Return the first non-empty block starting with 'block'
+//
+// Arguments:
+// block - the BasicBlock from which we start looking
+//
+// Return Value:
+// The first non-empty BasicBlock we find.
+//
+BasicBlock* getNonEmptyBlock(BasicBlock* block)
+{
+ while (block != nullptr && block->bbTreeList == nullptr)
+ {
+ BasicBlock* nextBlock = block->bbNext;
+ // Note that here we use the version of NumSucc that does not take a compiler.
+ // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan.
+ // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which
+ // case we don't need the version that takes a compiler.
+ assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE)));
+ // sometimes the first block is empty and ends with an uncond branch
+ // assert( block->GetSucc(0) == nextBlock);
+ block = nextBlock;
+ }
+ assert(block != nullptr && block->bbTreeList != nullptr);
+ return block;
+}
+
+//------------------------------------------------------------------------
+// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block
+//
+// Notes:
+// Prior to calling this method, 'currentLiveVars' must be set to the set of register
+// candidate variables that are liveIn to the first block.
+// For each register candidate that is live-in to the first block:
+// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created.
+// - Otherwise, it will be marked as spilled, since it will not be assigned a register
+// on entry and will be loaded from memory on the undefined path.
+// Note that, when the compInitMem option is not set, we may encounter these on
+// paths that are protected by the same condition as an earlier def. However, since
+// we don't do the analysis to determine this - and couldn't rely on always identifying
+// such cases even if we tried - we must conservatively treat the undefined path as
+// being possible. This is a relatively rare case, so the introduced conservatism is
+// not expected to warrant the analysis required to determine the best placement of
+// an initialization.
+//
+void LinearScan::insertZeroInitRefPositions()
+{
+ assert(enregisterLocalVars);
+#ifdef DEBUG
+ VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn));
+ assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars));
+#endif // DEBUG
+
+ // insert defs for this, then a block boundary
+
+ VarSetOps::Iter iter(compiler, currentLiveVars);
+ unsigned varIndex = 0;
+ while (iter.NextElem(&varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ if (!varDsc->lvIsParam && isCandidateVar(varDsc))
+ {
+ JITDUMP("V%02u was live in to first block:", varNum);
+ Interval* interval = getIntervalForLocalVar(varIndex);
+ if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))
+ {
+ JITDUMP(" creating ZeroInit\n");
+ GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
+ RefPosition* pos =
+ newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
+ varDsc->lvMustInit = true;
+ }
+ else
+ {
+ setIntervalAsSpilled(interval);
+ JITDUMP(" marking as spilled\n");
+ }
+ }
+ }
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+//------------------------------------------------------------------------
+// unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems.
+//
+// Arguments:
+// argDsc - the LclVarDsc for the argument of interest
+//
+// Notes:
+// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
+// for how state for argument is updated for unix non-structs and Windows AMD64 structs.
+//
+void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
+{
+ assert(varTypeIsStruct(argDsc));
+ RegState* intRegState = &compiler->codeGen->intRegState;
+ RegState* floatRegState = &compiler->codeGen->floatRegState;
+
+ if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA))
+ {
+ if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT))
+ {
+ assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS));
+ floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+ }
+ else
+ {
+ assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS));
+ intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+ }
+ }
+
+ if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
+ {
+ if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT))
+ {
+ assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS));
+ floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+ }
+ else
+ {
+ assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS));
+ intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+ }
+ }
+}
+
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+//------------------------------------------------------------------------
+// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
+// regState (either compiler->intRegState or compiler->floatRegState),
+// with the lvArgReg on "argDsc"
+//
+// Arguments:
+// argDsc - the argument for which the state is to be updated.
+//
+// Return Value: None
+//
+// Assumptions:
+// The argument is live on entry to the function
+// (or is untracked and therefore assumed live)
+//
+// Notes:
+// This relies on a method in regAlloc.cpp that is shared between LSRA
+// and regAlloc. It is further abstracted here because regState is updated
+// separately for tracked and untracked variables in LSRA.
+//
+void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
+{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
+ // Handle them here.
+ if (varTypeIsStruct(argDsc))
+ {
+ unixAmd64UpdateRegStateForArg(argDsc);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ RegState* intRegState = &compiler->codeGen->intRegState;
+ RegState* floatRegState = &compiler->codeGen->floatRegState;
+ // In the case of AMD64 we'll still use the floating point registers
+ // to model the register usage for argument on vararg calls, so
+ // we will ignore the varargs condition to determine whether we use
+ // XMM registers or not for setting up the call.
+ bool isFloat = (isFloatRegType(argDsc->lvType)
+#ifndef _TARGET_AMD64_
+ && !compiler->info.compIsVarArgs
+#endif
+ && !compiler->opts.compUseSoftFP);
+
+ if (argDsc->lvIsHfaRegArg())
+ {
+ isFloat = true;
+ }
+
+ if (isFloat)
+ {
+ JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+ compiler->raUpdateRegStateForArg(floatRegState, argDsc);
+ }
+ else
+ {
+ JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+#if FEATURE_MULTIREG_ARGS
+ if (argDsc->lvOtherArgReg != REG_NA)
+ {
+ JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg));
+ }
+#endif // FEATURE_MULTIREG_ARGS
+ compiler->raUpdateRegStateForArg(intRegState, argDsc);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// buildIntervals: The main entry point for building the data structures over
+// which we will do register allocation.
+//
+void LinearScan::buildIntervals()
+{
+ BasicBlock* block;
+
+ JITDUMP("\nbuildIntervals ========\n");
+
+ // Build (empty) records for all of the physical registers
+ buildPhysRegRecords();
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("\n-----------------\n");
+ printf("LIVENESS:\n");
+ printf("-----------------\n");
+ foreach_block(compiler, block)
+ {
+ printf("BB%02u use def in out\n", block->bbNum);
+ dumpConvertedVarSet(compiler, block->bbVarUse);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbVarDef);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbLiveIn);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbLiveOut);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+#if DOUBLE_ALIGN
+ // We will determine whether we should double align the frame during
+ // identifyCandidates(), but we initially assume that we will not.
+ doDoubleAlign = false;
+#endif
+
+ identifyCandidates();
+
+ // Figure out if we're going to use a frame pointer. We need to do this before building
+ // the ref positions, because those objects will embed the frame register in various register masks
+ // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will
+ // remove the frame pointer from the masks.
+ setFrameType();
+
+ DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE));
+
+ // second part:
+ JITDUMP("\nbuildIntervals second part ========\n");
+ currentLoc = 0;
+ // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were
+ // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering.
+ // Previously, the block sequencing was done for the (formerly separate) Build pass,
+ // and the curBBNum was left as the last block sequenced. This block was then used to set the
+ // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the
+ // normalized entry weight (compiler->fgCalledCount), but that results in a net regression.
+ if (!blockSequencingDone)
+ {
+ setBlockSequence();
+ }
+ curBBNum = blockSequence[bbSeqCount - 1]->bbNum;
+
+ // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex.
+ // Assign these RefPositions to the (nonexistent) BB0.
+ curBBNum = 0;
+
+ LclVarDsc* argDsc;
+ unsigned int lclNum;
+
+ RegState* intRegState = &compiler->codeGen->intRegState;
+ RegState* floatRegState = &compiler->codeGen->floatRegState;
+ intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
+ floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
+
+ for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
+ {
+ lclNum = compiler->lvaTrackedToVarNum[varIndex];
+ argDsc = &(compiler->lvaTable[lclNum]);
+
+ if (!argDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ // Only reserve a register if the argument is actually used.
+ // Is it dead on entry? If compJmpOpUsed is true, then the arguments
+ // have to be kept alive, so we have to consider it as live on entry.
+ // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we
+ // won't have done dataflow on it, but it needs to be marked as live-in so
+ // it will get saved in the prolog.
+ if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode)
+ {
+ continue;
+ }
+
+ if (argDsc->lvIsRegArg)
+ {
+ updateRegStateForArg(argDsc);
+ }
+
+ if (isCandidateVar(argDsc))
+ {
+ Interval* interval = getIntervalForLocalVar(varIndex);
+ regMaskTP mask = allRegs(TypeGet(argDsc));
+ if (argDsc->lvIsRegArg)
+ {
+ // Set this interval as currently assigned to that register
+ regNumber inArgReg = argDsc->lvArgReg;
+ assert(inArgReg < REG_COUNT);
+ mask = genRegMask(inArgReg);
+ assignPhysReg(inArgReg, interval);
+ }
+ RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask);
+ }
+ else if (varTypeIsStruct(argDsc->lvType))
+ {
+ for (unsigned fieldVarNum = argDsc->lvFieldLclStart;
+ fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum)
+ {
+ LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]);
+ if (fieldVarDsc->lvLRACandidate)
+ {
+ assert(fieldVarDsc->lvTracked);
+ Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex);
+ RefPosition* pos =
+ newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc)));
+ }
+ }
+ }
+ else
+ {
+ // We can overwrite the register (i.e. codegen saves it on entry)
+ assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister ||
+ !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode));
+ }
+ }
+
+ // Now set up the reg state for the non-tracked args
+ // (We do this here because we want to generate the ParamDef RefPositions in tracked
+ // order, so that loop doesn't hit the non-tracked args)
+
+ for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++)
+ {
+ argDsc = &(compiler->lvaTable[argNum]);
+
+ if (argDsc->lvPromotedStruct())
+ {
+ noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = argDsc->lvFieldLclStart;
+ argDsc = &(compiler->lvaTable[fieldVarNum]);
+ }
+ noway_assert(argDsc->lvIsParam);
+ if (!argDsc->lvTracked && argDsc->lvIsRegArg)
+ {
+ updateRegStateForArg(argDsc);
+ }
+ }
+
+ // If there is a secret stub param, it is also live in
+ if (compiler->info.compPublishStubParam)
+ {
+ intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
+ }
+
+ BasicBlock* predBlock = nullptr;
+ BasicBlock* prevBlock = nullptr;
+
+ // Initialize currentLiveVars to the empty set. We will set it to the current
+ // live-in at the entry to each block (this will include the incoming args on
+ // the first block).
+ VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler));
+
+ for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
+ {
+ JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum);
+
+ bool predBlockIsAllocated = false;
+ predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated));
+ if (predBlock)
+ {
+ JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n",
+ block->bbNum, predBlock->bbNum);
+ assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
+ blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
+ }
+
+ if (enregisterLocalVars)
+ {
+ VarSetOps::AssignNoCopy(compiler, currentLiveVars,
+ VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn));
+
+ if (block == compiler->fgFirstBB)
+ {
+ insertZeroInitRefPositions();
+ // The first real location is at 1; 0 is for the entry.
+ currentLoc = 1;
+ }
+
+ // Any lclVars live-in to a block are resolution candidates.
+ VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars);
+
+ // Determine if we need any DummyDefs.
+ // We need DummyDefs for cases where "predBlock" isn't really a predecessor.
+ // Note that it's possible to have uses of unitialized variables, in which case even the first
+ // block may require DummyDefs, which we are not currently adding - this means that these variables
+ // will always be considered to be in memory on entry (and reloaded when the use is encountered).
+ // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized
+ // variables (which may actually be initialized along the dynamically executed paths, but not
+ // on all static paths), we wind up with excessive liveranges for some of these variables.
+ VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars));
+ if (predBlock)
+ {
+ // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut
+ VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut);
+ }
+ bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB);
+
+ // Create dummy def RefPositions
+
+ if (needsDummyDefs)
+ {
+ // If we are using locations from a predecessor, we should never require DummyDefs.
+ assert(!predBlockIsAllocated);
+
+ JITDUMP("Creating dummy definitions\n");
+ VarSetOps::Iter iter(compiler, newLiveIn);
+ unsigned varIndex = 0;
+ while (iter.NextElem(&varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ // Add a dummyDef for any candidate vars that are in the "newLiveIn" set.
+ // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs).
+ if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam))
+ {
+ Interval* interval = getIntervalForLocalVar(varIndex);
+ RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr,
+ allRegs(interval->registerType));
+ }
+ }
+ JITDUMP("Finished creating dummy definitions\n\n");
+ }
+ }
+
+ // Add a dummy RefPosition to mark the block boundary.
+ // Note that we do this AFTER adding the exposed uses above, because the
+ // register positions for those exposed uses need to be recorded at
+ // this point.
+
+ RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
+ currentLoc += 2;
+ JITDUMP("\n");
+
+ LIR::Range& blockRange = LIR::AsRange(block);
+ for (GenTree* node : blockRange.NonPhiNodes())
+ {
+ // We increment the number position of each tree node by 2 to simplify the logic when there's the case of
+ // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to
+ // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only
+ // increment the number position by one.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ node->gtSeqNum = currentLoc;
+ // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register
+ // value.
+ // Although this looks like a no-op it sets the tag.
+ node->gtRegNum = node->gtRegNum;
+#endif
+
+ buildRefPositionsForNode(node, block, currentLoc);
+
+#ifdef DEBUG
+ if (currentLoc > maxNodeLocation)
+ {
+ maxNodeLocation = currentLoc;
+ }
+#endif // DEBUG
+ currentLoc += 2;
+ }
+
+ // Note: the visited set is cleared in LinearScan::doLinearScan()
+ markBlockVisited(block);
+ assert(defList.IsEmpty());
+
+ if (enregisterLocalVars)
+ {
+ // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the
+ // next block, or any unvisited successors.
+ // This will address lclVars that are live on a backedge, as well as those that are kept
+ // live at a GT_JMP.
+ //
+ // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP,
+ // and jmp call is represented using GT_JMP node which is a leaf node.
+ // Liveness phase keeps all the arguments of the method live till the end of
+ // block by adding them to liveout set of the block containing GT_JMP.
+ //
+ // The target of a GT_JMP implicitly uses all the current method arguments, however
+ // there are no actual references to them. This can cause LSRA to assert, because
+ // the variables are live but it sees no references. In order to correctly model the
+ // liveness of these arguments, we add dummy exposed uses, in the same manner as for
+ // backward branches. This will happen automatically via expUseSet.
+ //
+ // Note that a block ending with GT_JMP has no successors and hence the variables
+ // for which dummy use ref positions are added are arguments of the method.
+
+ VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut));
+ VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars);
+ BasicBlock* nextBlock = getNextBlock();
+ if (nextBlock != nullptr)
+ {
+ VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn);
+ }
+ for (BasicBlock* succ : block->GetAllSuccs(compiler))
+ {
+ if (VarSetOps::IsEmpty(compiler, expUseSet))
+ {
+ break;
+ }
+
+ if (isBlockVisited(succ))
+ {
+ continue;
+ }
+ VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn);
+ }
+
+ if (!VarSetOps::IsEmpty(compiler, expUseSet))
+ {
+ JITDUMP("Exposed uses:");
+ VarSetOps::Iter iter(compiler, expUseSet);
+ unsigned varIndex = 0;
+ while (iter.NextElem(&varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ assert(isCandidateVar(varDsc));
+ Interval* interval = getIntervalForLocalVar(varIndex);
+ RefPosition* pos =
+ newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+ JITDUMP(" V%02u", varNum);
+ }
+ JITDUMP("\n");
+ }
+
+ // Clear the "last use" flag on any vars that are live-out from this block.
+ {
+ VarSetOps::Iter iter(compiler, block->bbLiveOut);
+ unsigned varIndex = 0;
+ while (iter.NextElem(&varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* const varDsc = &compiler->lvaTable[varNum];
+ if (isCandidateVar(varDsc))
+ {
+ RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition;
+ if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum))
+ {
+ lastRP->lastUse = false;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ checkLastUses(block);
+
+ if (VERBOSE)
+ {
+ printf("use: ");
+ dumpConvertedVarSet(compiler, block->bbVarUse);
+ printf("\ndef: ");
+ dumpConvertedVarSet(compiler, block->bbVarDef);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+
+ prevBlock = block;
+ }
+
+ if (enregisterLocalVars)
+ {
+ if (compiler->lvaKeepAliveAndReportThis())
+ {
+ // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end
+ unsigned keepAliveVarNum = compiler->info.compThisArg;
+ assert(compiler->info.compIsStatic == false);
+ LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum;
+ if (isCandidateVar(varDsc))
+ {
+ JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n");
+ Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
+ RefPosition* pos =
+ newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+ }
+ }
+
+#ifdef DEBUG
+ if (getLsraExtendLifeTimes())
+ {
+ LclVarDsc* varDsc;
+ for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvLRACandidate)
+ {
+ JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum);
+ Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
+ RefPosition* pos =
+ newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+ }
+ }
+ }
+#endif // DEBUG
+ }
+
+ // If the last block has successors, create a RefTypeBB to record
+ // what's live
+
+ if (prevBlock->NumSucc(compiler) > 0)
+ {
+ RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
+ }
+
+#ifdef DEBUG
+ // Make sure we don't have any blocks that were not visited
+ foreach_block(compiler, block)
+ {
+ assert(isBlockVisited(block));
+ }
+
+ if (VERBOSE)
+ {
+ lsraDumpIntervals("BEFORE VALIDATING INTERVALS");
+ dumpRefPositions("BEFORE VALIDATING INTERVALS");
+ validateIntervals();
+ }
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// validateIntervals: A DEBUG-only method that checks that the lclVar RefPositions
+// do not reflect uses of undefined values
+//
+// Notes: If an undefined use is encountered, it merely prints a message.
+//
+// TODO-Cleanup: This should probably assert, or at least print the message only
+// when doing a JITDUMP.
+//
+void LinearScan::validateIntervals()
+{
+ if (enregisterLocalVars)
+ {
+ for (unsigned i = 0; i < compiler->lvaTrackedCount; i++)
+ {
+ if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate)
+ {
+ continue;
+ }
+ Interval* interval = getIntervalForLocalVar(i);
+
+ bool defined = false;
+ printf("-----------------\n");
+ for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
+ {
+ ref->dump();
+ RefType refType = ref->refType;
+ if (!defined && RefTypeIsUse(refType))
+ {
+ if (compiler->info.compMethodName != nullptr)
+ {
+ printf("%s: ", compiler->info.compMethodName);
+ }
+ printf("LocalVar V%02u: undefined use at %u\n", interval->varNum, ref->nodeLocation);
+ }
+ // Note that there can be multiple last uses if they are on disjoint paths,
+ // so we can't really check the lastUse flag
+ if (ref->lastUse)
+ {
+ defined = false;
+ }
+ if (RefTypeIsDef(refType))
+ {
+ defined = true;
+ }
+ }
+ }
+ }
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// GetIndirInfo: Get the source registers for an indirection that might be contained.
+//
+// Arguments:
+// node - The node of interest
+//
+// Return Value:
+// The number of source registers used by the *parent* of this node.
+//
+// Notes:
+// Adds the defining node for each register to the useList.
+//
+int LinearScan::GetIndirInfo(GenTreeIndir* indirTree)
+{
+ GenTree* const addr = indirTree->gtOp1;
+ if (!addr->isContained())
+ {
+ appendLocationInfoToList(addr);
+ return 1;
+ }
+ if (!addr->OperIs(GT_LEA))
+ {
+ return 0;
+ }
+
+ GenTreeAddrMode* const addrMode = addr->AsAddrMode();
+
+ unsigned srcCount = 0;
+ if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
+ {
+ appendLocationInfoToList(addrMode->Base());
+ srcCount++;
+ }
+ if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
+ {
+ appendLocationInfoToList(addrMode->Index());
+ srcCount++;
+ }
+ return srcCount;
+}
+
+//------------------------------------------------------------------------
+// GetOperandInfo: Get the source registers for an operand that might be contained.
+//
+// Arguments:
+// node - The node of interest
+// useList - The list of uses for the node that we're currently processing
+//
+// Return Value:
+// The number of source registers used by the *parent* of this node.
+//
+// Notes:
+// Adds the defining node for each register to the given useList.
+//
+int LinearScan::GetOperandInfo(GenTree* node)
+{
+ if (!node->isContained())
+ {
+ appendLocationInfoToList(node);
+ return 1;
+ }
+
+#if !defined(_TARGET_64BIT_)
+ if (node->OperIs(GT_LONG))
+ {
+ return appendBinaryLocationInfoToList(node->AsOp());
+ }
+#endif // !defined(_TARGET_64BIT_)
+ if (node->OperIsIndir())
+ {
+ const unsigned srcCount = GetIndirInfo(node->AsIndir());
+ return srcCount;
+ }
+
+ return 0;
+}
+
+//------------------------------------------------------------------------
+// GetOperandInfo: Get the source registers for an operand that might be contained.
+//
+// Arguments:
+// node - The node of interest
+// useList - The list of uses for the node that we're currently processing
+//
+// Return Value:
+// The number of source registers used by the *parent* of this node.
+//
+// Notes:
+// Adds the defining node for each register to the useList.
+//
+int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo)
+{
+ LocationInfoListNode* prevLast = useList.Last();
+ int srcCount = GetOperandInfo(node);
+ if (prevLast == nullptr)
+ {
+ *pFirstInfo = useList.Begin();
+ }
+ else
+ {
+ *pFirstInfo = prevLast->Next();
+ }
+ return srcCount;
+}
+
+void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node)
+{
+ _dstCount = 0;
+ _srcCount = 0;
+ _internalIntCount = 0;
+ _internalFloatCount = 0;
+
+ isLocalDefUse = false;
+ isDelayFree = false;
+ hasDelayFreeSrc = false;
+ isTgtPref = false;
+ isInternalRegDelayFree = false;
+
+ regMaskTP dstCandidates;
+
+ // if there is a reg indicated on the tree node, use that for dstCandidates
+ // the exception is the NOP, which sometimes show up around late args.
+ // TODO-Cleanup: get rid of those NOPs.
+ if (node->gtRegNum == REG_STK)
+ {
+ dstCandidates = RBM_NONE;
+ }
+ else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
+ {
+#ifdef ARM_SOFTFP
+ if (node->OperGet() == GT_PUTARG_REG)
+ {
+ dstCandidates = lsra->allRegs(TYP_INT);
+ }
+ else
+#endif
+ {
+ dstCandidates = lsra->allRegs(node->TypeGet());
+ }
+ }
+ else
+ {
+ dstCandidates = genRegMask(node->gtRegNum);
+ }
+
+ setDstCandidates(lsra, dstCandidates);
+ srcCandsIndex = dstCandsIndex;
+
+ setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+
+#ifdef DEBUG
+ isInitialized = true;
+#endif
+
+ assert(IsValid(lsra));
+}
+
+//------------------------------------------------------------------------
+// getSrcCandidates: Get the source candidates (candidates for the consumer
+// of the node) from the TreeNodeInfo
+//
+// Arguments:
+// lsra - the LinearScan object
+//
+// Return Value:
+// The set of registers (as a register mask) that are candidates for the
+// consumer of the node
+//
+// Notes:
+// The LinearScan object maintains the mapping from the indices kept in the
+// TreeNodeInfo to the actual register masks.
+//
+regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra)
+{
+ return lsra->GetRegMaskForIndex(srcCandsIndex);
+}
+
+//------------------------------------------------------------------------
+// setSrcCandidates: Set the source candidates (candidates for the consumer
+// of the node) on the TreeNodeInfo
+//
+// Arguments:
+// lsra - the LinearScan object
+//
+// Notes: see getSrcCandidates
+//
+void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask)
+{
+ LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+ assert(FitsIn<unsigned char>(i));
+ srcCandsIndex = (unsigned char)i;
+}
+
+//------------------------------------------------------------------------
+// getDstCandidates: Get the dest candidates (candidates for the definition
+// of the node) from the TreeNodeInfo
+//
+// Arguments:
+// lsra - the LinearScan object
+//
+// Return Value:
+// The set of registers (as a register mask) that are candidates for the
+// node itself
+//
+// Notes: see getSrcCandidates
+//
+regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra)
+{
+ return lsra->GetRegMaskForIndex(dstCandsIndex);
+}
+
+//------------------------------------------------------------------------
+// setDstCandidates: Set the dest candidates (candidates for the definition
+// of the node) on the TreeNodeInfo
+//
+// Arguments:
+// lsra - the LinearScan object
+//
+// Notes: see getSrcCandidates
+//
+void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask)
+{
+ LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+ assert(FitsIn<unsigned char>(i));
+ dstCandsIndex = (unsigned char)i;
+}
+
+//------------------------------------------------------------------------
+// getInternalCandidates: Get the internal candidates (candidates for the internal
+// temporary registers used by a node) from the TreeNodeInfo
+//
+// Arguments:
+// lsra - the LinearScan object
+//
+// Return Value:
+// The set of registers (as a register mask) that are candidates for the
+// internal temporary registers.
+//
+// Notes: see getSrcCandidates
+//
+regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra)
+{
+ return lsra->GetRegMaskForIndex(internalCandsIndex);
+}
+
+//------------------------------------------------------------------------
+// getInternalCandidates: Set the internal candidates (candidates for the internal
+// temporary registers used by a node) on the TreeNodeInfo
+//
+// Arguments:
+// lsra - the LinearScan object
+//
+// Notes: see getSrcCandidates
+//
+void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask)
+{
+ LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+ assert(FitsIn<unsigned char>(i));
+ internalCandsIndex = (unsigned char)i;
+}
+
+//------------------------------------------------------------------------
+// addInternalCandidates: Add internal candidates (candidates for the internal
+// temporary registers used by a node) on the TreeNodeInfo
+//
+// Arguments:
+// lsra - the LinearScan object
+//
+// Notes: see getSrcCandidates
+//
+void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
+{
+ LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask);
+ assert(FitsIn<unsigned char>(i));
+ internalCandsIndex = (unsigned char)i;
+}
+
+//------------------------------------------------------------------------
+// BuildStoreLoc: Set register requirements for a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Setting the appropriate candidates for a store of a multi-reg call return value.
+// - Handling of contained immediates.
+// - Requesting an internal register for SIMD12 stores.
+//
+void LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ TreeNodeInfo* info = currentNodeInfo;
+ GenTree* op1 = storeLoc->gtGetOp1();
+
+ assert(info->dstCount == 0);
+
+ if (op1->IsMultiRegCall())
+ {
+ // This is the case of var = call where call is returning
+ // a value in multiple return registers.
+ // Must be a store lclvar.
+ assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+ // srcCount = number of registers in which the value is returned by call
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ info->srcCount = regCount;
+
+ // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+ regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
+ LocationInfoListNode* locInfo = getLocationInfo(op1);
+ locInfo->info.setSrcCandidates(this, srcCandidates);
+ useList.Append(locInfo);
+ }
+#ifndef _TARGET_64BIT_
+ else if (varTypeIsLong(op1))
+ {
+ if (op1->OperIs(GT_MUL_LONG))
+ {
+#ifdef _TARGET_X86_
+ // This is actually a bug. A GT_MUL_LONG produces two registers, but is modeled as only producing
+ // eax (and killing edx). This only works because it always occurs as var = GT_MUL_LONG (ensured by
+ // DecomposeMul), and therefore edx won't be reused before the store.
+ // TODO-X86-Cleanup: GT_MUL_LONG should be a multireg node on x86, just as on ARM.
+ info->srcCount = 1;
+#else
+ info->srcCount = 2;
+#endif
+ appendLocationInfoToList(op1);
+ }
+ else
+ {
+ assert(op1->OperIs(GT_LONG));
+ assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained());
+ info->srcCount = appendBinaryLocationInfoToList(op1->AsOp());
+ assert(info->srcCount == 2);
+ }
+ }
+#endif // !_TARGET_64BIT_
+ else if (op1->isContained())
+ {
+ info->srcCount = 0;
+ }
+ else
+ {
+ info->srcCount = 1;
+ appendLocationInfoToList(op1);
+ }
+
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(storeLoc))
+ {
+ if (!op1->isContained() && (storeLoc->TypeGet() == TYP_SIMD12))
+ {
+// Need an additional register to extract upper 4 bytes of Vector3.
+#ifdef _TARGET_XARCH_
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(this, allSIMDRegs());
+#elif defined(_TARGET_ARM64_)
+ info->internalIntCount = 1;
+#else
+#error "Unknown target architecture for STORE_LCL_VAR of SIMD12"
+#endif
+ }
+ }
+#endif // FEATURE_SIMD
+}
+
+//------------------------------------------------------------------------
+// BuildSimple: Sets the srcCount for all the trees
+// without special handling based on the tree node type.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void LinearScan::BuildSimple(GenTree* tree)
+{
+ TreeNodeInfo* info = currentNodeInfo;
+ unsigned kind = tree->OperKind();
+ assert(info->dstCount == (tree->IsValue() ? 1 : 0));
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ }
+ else
+ {
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// BuildReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void LinearScan::BuildReturn(GenTree* tree)
+{
+ TreeNodeInfo* info = currentNodeInfo;
+ assert(info->dstCount == 0);
+ GenTree* op1 = tree->gtGetOp1();
+
+#if !defined(_TARGET_64BIT_)
+ if (tree->TypeGet() == TYP_LONG)
+ {
+ assert((op1->OperGet() == GT_LONG) && op1->isContained());
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+ info->srcCount = 2;
+ LocationInfoListNode* loValInfo = getLocationInfo(loVal);
+ LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
+ loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO);
+ hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI);
+ useList.Append(loValInfo);
+ useList.Append(hiValInfo);
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
+ {
+ regMaskTP useCandidates = RBM_NONE;
+
+ info->srcCount = 1;
+
+#if FEATURE_MULTIREG_RET
+ if (varTypeIsStruct(tree))
+ {
+ // op1 has to be either an lclvar or a multi-reg returning call
+ if (op1->OperGet() != GT_LCL_VAR)
+ {
+ noway_assert(op1->IsMultiRegCall());
+
+ ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+ useCandidates = retTypeDesc->GetABIReturnRegs();
+ }
+ }
+ else
+#endif // FEATURE_MULTIREG_RET
+ {
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
+ {
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ // We ONLY want the valid double register in the RBM_DOUBLERET mask.
+ useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE);
+ break;
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+ default:
+ useCandidates = RBM_INTRET;
+ break;
+ }
+ }
+
+ LocationInfoListNode* locationInfo = getLocationInfo(op1);
+ if (useCandidates != RBM_NONE)
+ {
+ locationInfo->info.setSrcCandidates(this, useCandidates);
+ }
+ useList.Append(locationInfo);
+ }
+}
+
+//------------------------------------------------------------------------
+// BuildPutArgReg: Set the NodeInfo for a PUTARG_REG.
+//
+// Arguments:
+// node - The PUTARG_REG node.
+// argReg - The register in which to pass the argument.
+// info - The info for the node's using call.
+// isVarArgs - True if the call uses a varargs calling convention.
+// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
+//
+// Return Value:
+// None.
+//
+void LinearScan::BuildPutArgReg(GenTreeUnOp* node)
+{
+ TreeNodeInfo* info = currentNodeInfo;
+ assert(node != nullptr);
+ assert(node->OperIsPutArgReg());
+ info->srcCount = 1;
+ regNumber argReg = node->gtRegNum;
+ assert(argReg != REG_NA);
+
+ // Set the register requirements for the node.
+ regMaskTP argMask = genRegMask(argReg);
+
+#ifdef _TARGET_ARM_
+ // If type of node is `long` then it is actually `double`.
+ // The actual `long` types must have been transformed as a field list with two fields.
+ if (node->TypeGet() == TYP_LONG)
+ {
+ info->srcCount++;
+ info->dstCount = info->srcCount;
+ assert(genRegArgNext(argReg) == REG_NEXT(argReg));
+ argMask |= genRegMask(REG_NEXT(argReg));
+ }
+#endif // _TARGET_ARM_
+ info->setDstCandidates(this, argMask);
+ info->setSrcCandidates(this, argMask);
+
+ // To avoid redundant moves, have the argument operand computed in the
+ // register in which the argument is passed to the call.
+ LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1);
+ op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this));
+ op1Info->info.isDelayFree = true;
+ useList.Append(op1Info);
+}
+
+//------------------------------------------------------------------------
+// HandleFloatVarArgs: Handle additional register requirements for a varargs call
+//
+// Arguments:
+// call - The call node of interest
+// argNode - The current argument
+//
+// Return Value:
+// None.
+//
+// Notes:
+// In the case of a varargs call, the ABI dictates that if we have floating point args,
+// we must pass the enregistered arguments in both the integer and floating point registers.
+// Since the integer register is not associated with the arg node, we will reserve it as
+// an internal register on the call so that it is not used during the evaluation of the call node
+// (e.g. for the target).
+void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
+{
+#if FEATURE_VARARG
+ TreeNodeInfo* info = currentNodeInfo;
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ *callHasFloatRegArgs = true;
+
+ regNumber argReg = argNode->gtRegNum;
+ regNumber targetReg = compiler->getCallArgIntRegister(argReg);
+ info->setInternalIntCount(info->internalIntCount + 1);
+ info->addInternalCandidates(this, genRegMask(targetReg));
+ }
+#endif // FEATURE_VARARG
+}
+
+//------------------------------------------------------------------------
+// BuildGCWriteBarrier: Handle additional register requirements for a GC write barrier
+//
+// Arguments:
+// tree - The STORE_IND for which a write barrier is required
+//
+void LinearScan::BuildGCWriteBarrier(GenTree* tree)
+{
+ TreeNodeInfo* info = currentNodeInfo;
+ GenTree* dst = tree;
+ GenTree* addr = tree->gtOp.gtOp1;
+ GenTree* src = tree->gtOp.gtOp2;
+ LocationInfoListNode* addrInfo = getLocationInfo(addr);
+ LocationInfoListNode* srcInfo = getLocationInfo(src);
+
+ // In the case where we are doing a helper assignment, even if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ assert(!addr->isContained() && !src->isContained());
+ useList.Append(addrInfo);
+ useList.Append(srcInfo);
+ info->srcCount = 2;
+ assert(info->dstCount == 0);
+ bool customSourceRegs = false;
+
+#if NOGC_WRITE_BARRIERS
+
+#if defined(_TARGET_ARM64_)
+ // For the NOGC JIT Helper calls
+ //
+ // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+ // the 'src' goes into x15 (REG_WRITE_BARRIER)
+ //
+ addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF);
+ srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER);
+ customSourceRegs = true;
+
+#elif defined(_TARGET_X86_)
+
+ bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src);
+
+ if (useOptimizedWriteBarrierHelper)
+ {
+ // Special write barrier:
+ // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
+ // op2 (src) goes into any int register.
+ addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER);
+ srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
+ customSourceRegs = true;
+ }
+#else // !defined(_TARGET_X86_) && !defined(_TARGET_ARM64_)
+#error "NOGC_WRITE_BARRIERS is not supported"
+#endif // !defined(_TARGET_X86_)
+
+#endif // NOGC_WRITE_BARRIERS
+
+ if (!customSourceRegs)
+ {
+ // For the standard JIT Helper calls:
+ // op1 (addr) goes into REG_ARG_0 and
+ // op2 (src) goes into REG_ARG_1
+ addrInfo->info.setSrcCandidates(this, RBM_ARG_0);
+ srcInfo->info.setSrcCandidates(this, RBM_ARG_1);
+ }
+
+ // Both src and dst must reside in a register, which they should since we haven't set
+ // either of them as contained.
+ assert(addrInfo->info.dstCount == 1);
+ assert(srcInfo->info.dstCount == 1);
+}
+
+//------------------------------------------------------------------------
+// BuildCmp: Set the register requirements for a compare.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void LinearScan::BuildCmp(GenTree* tree)
+{
+ TreeNodeInfo* info = currentNodeInfo;
+ assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP));
+
+ info->srcCount = 0;
+ assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
+
+#ifdef _TARGET_X86_
+ // If the compare is used by a jump, we just need to set the condition codes. If not, then we need
+ // to store the result into the low byte of a register, which requires the dst be a byteable register.
+ // We always set the dst candidates, though, because if this is compare is consumed by a jump, they
+ // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
+ // that flag is maintained until this location (especially for decomposed long compares).
+ info->setDstCandidates(this, RBM_BYTE_REGS);
+#endif // _TARGET_X86_
+
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+}
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index b376788a7a..3fe91765b3 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -29,75 +29,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
//------------------------------------------------------------------------
-// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar
-//
-// Arguments:
-// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
-//
-// Notes:
-// This involves:
-// - Setting the appropriate candidates for a store of a multi-reg call return value.
-// - Requesting an internal register for SIMD12 stores.
-//
-void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNodeInfo* info)
-{
- assert(info->dstCount == 0);
- GenTree* op1 = storeLoc->gtGetOp1();
-
-#ifdef _TARGET_X86_
- if (op1->OperGet() == GT_LONG)
- {
- assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained());
- info->srcCount = appendBinaryLocationInfoToList(op1->AsOp());
- assert(info->srcCount == 2);
- }
- else
-#endif // _TARGET_X86_
- if (op1->isContained())
- {
- info->srcCount = 0;
- }
- else if (op1->IsMultiRegCall())
- {
- // This is the case of var = call where call is returning
- // a value in multiple return registers.
- // Must be a store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- info->srcCount = regCount;
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
- LocationInfoListNode* locInfo = getLocationInfo(op1);
- locInfo->info.setSrcCandidates(this, srcCandidates);
- useList.Append(locInfo);
- }
- else
- {
- info->srcCount = 1;
- appendLocationInfoToList(op1);
- }
-
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc))
- {
- if (!op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12))
- {
- // Need an additional register to extract upper 4 bytes of Vector3.
- info->internalFloatCount = 1;
- info->setInternalCandidates(this, allSIMDRegs());
- }
- return;
- }
-#endif // FEATURE_SIMD
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInit: Set register requirements for a node
+// BuildNode: Set register requirements for a node
//
// Arguments:
// treeNode - the node of interest
@@ -111,16 +43,11 @@ void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNod
// requirements needed by LSRA to build the Interval Table (source,
// destination and internal [temp] register counts).
//
-void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildNode(GenTree* tree)
{
- if (tree->isContained())
- {
- info->dstCount = 0;
- assert(info->srcCount == 0);
- return;
- }
+ TreeNodeInfo* info = currentNodeInfo;
+ assert(!tree->isContained());
- // Set the default dstCount. This may be modified below.
if (tree->IsValue())
{
info->dstCount = 1;
@@ -139,7 +66,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
switch (tree->OperGet())
{
default:
- TreeNodeInfoInitSimple(tree, info);
+ BuildSimple(tree);
break;
case GT_LCL_VAR:
@@ -181,7 +108,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info);
+ BuildStoreLoc(tree->AsLclVarCommon());
break;
case GT_LIST:
@@ -225,7 +152,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_RETURN:
- TreeNodeInfoInitReturn(tree, info);
+ BuildReturn(tree);
break;
case GT_RETFILT:
@@ -341,7 +268,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_DIV:
case GT_UMOD:
case GT_UDIV:
- TreeNodeInfoInitModDiv(tree->AsOp(), info);
+ BuildModDiv(tree->AsOp());
break;
case GT_MUL:
@@ -349,27 +276,27 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
case GT_MUL_LONG:
#endif
- TreeNodeInfoInitMul(tree->AsOp(), info);
+ BuildMul(tree->AsOp());
break;
case GT_INTRINSIC:
- TreeNodeInfoInitIntrinsic(tree->AsOp(), info);
+ BuildIntrinsic(tree->AsOp());
break;
#ifdef FEATURE_SIMD
case GT_SIMD:
- TreeNodeInfoInitSIMD(tree->AsSIMD(), info);
+ BuildSIMD(tree->AsSIMD());
break;
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
case GT_HWIntrinsic:
- TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic(), info);
+ BuildHWIntrinsic(tree->AsHWIntrinsic());
break;
#endif // FEATURE_HW_INTRINSICS
case GT_CAST:
- TreeNodeInfoInitCast(tree, info);
+ BuildCast(tree);
break;
case GT_BITCAST:
@@ -421,7 +348,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_LSH_HI:
case GT_RSH_LO:
#endif
- (void)TreeNodeInfoInitShiftRotate(tree, info);
+ (void)BuildShiftRotate(tree);
break;
case GT_EQ:
@@ -433,7 +360,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_CMP:
- TreeNodeInfoInitCmp(tree, info);
+ BuildCmp(tree);
break;
case GT_CKFINITE:
@@ -469,11 +396,11 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_PUTARG_REG:
- TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info);
+ BuildPutArgReg(tree->AsUnOp());
break;
case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall(), info);
+ BuildCall(tree->AsCall());
break;
case GT_ADDR:
@@ -499,14 +426,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info);
+ BuildPutArgStk(tree->AsPutArgStk());
break;
#endif // FEATURE_PUT_STRUCT_ARG_STK
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk(), info);
+ BuildBlockStore(tree->AsBlk());
break;
case GT_INIT_VAL:
@@ -515,7 +442,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_LCLHEAP:
- TreeNodeInfoInitLclHeap(tree, info);
+ BuildLclHeap(tree);
break;
case GT_ARR_BOUNDS_CHECK:
@@ -591,10 +518,10 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
case GT_STOREIND:
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
- TreeNodeInfoInitGCWriteBarrier(tree, info);
+ BuildGCWriteBarrier(tree);
break;
}
- TreeNodeInfoInitIndir(tree->AsIndir(), info);
+ BuildIndir(tree->AsIndir());
break;
case GT_NULLCHECK:
@@ -604,7 +531,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
break;
case GT_IND:
- TreeNodeInfoInitIndir(tree->AsIndir(), info);
+ BuildIndir(tree->AsIndir());
assert(info->dstCount == 1);
break;
@@ -734,7 +661,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
}
}
- TreeNodeInfoInitCheckByteable(tree, info);
+ BuildCheckByteable(tree);
// We need to be sure that we've set info->srcCount and info->dstCount appropriately
assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
@@ -790,7 +717,7 @@ bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitCheckByteable: Check the tree to see if "byte-able" registers are
+// BuildCheckByteable: Check the tree to see if "byte-able" registers are
// required, and set the tree node info accordingly.
//
// Arguments:
@@ -799,9 +726,10 @@ bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc)
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildCheckByteable(GenTree* tree)
{
#ifdef _TARGET_X86_
+ TreeNodeInfo* info = currentNodeInfo;
// Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
// if the tree node is a byte type.
//
@@ -893,39 +821,7 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitSimple: Sets the srcCount for all the trees
-// without special handling based on the tree node type.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void LinearScan::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info)
-{
- if (tree->isContained())
- {
- info->srcCount = 0;
- return;
- }
- unsigned kind = tree->OperKind();
- if (kind & (GTK_CONST | GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
- }
- else
- {
- unreached();
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+// BuildShiftRotate: Set the NodeInfo for a shift or rotate.
//
// Arguments:
// tree - The node of interest
@@ -933,90 +829,9 @@ void LinearScan::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info)
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info)
-{
- assert(info->dstCount == 0);
- GenTree* op1 = tree->gtGetOp1();
-
-#if !defined(_TARGET_64BIT_)
- if (tree->TypeGet() == TYP_LONG)
- {
- assert((op1->OperGet() == GT_LONG) && op1->isContained());
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
- info->srcCount = 2;
- LocationInfoListNode* loValInfo = getLocationInfo(loVal);
- LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
- loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO);
- hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI);
- useList.Append(loValInfo);
- useList.Append(hiValInfo);
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
- {
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = 1;
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (varTypeIsStruct(tree))
- {
- if (op1->OperGet() != GT_LCL_VAR)
- {
- noway_assert(op1->IsMultiRegCall());
-
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
- }
- }
- else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- unreached();
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
-#if defined(_TARGET_64BIT_)
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
-#endif // defined(_TARGET_64BIT_)
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- LocationInfoListNode* locationInfo = getLocationInfo(op1);
- if (useCandidates != RBM_NONE)
- {
- locationInfo->info.setSrcCandidates(this, useCandidates);
- }
- useList.Append(locationInfo);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info)
+int LinearScan::BuildShiftRotate(GenTree* tree)
{
+ TreeNodeInfo* info = currentNodeInfo;
// For shift operations, we need that the number
// of bits moved gets stored in CL in case
// the number of bits to shift is not a constant.
@@ -1044,16 +859,17 @@ int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info)
}
}
-// Note that Rotate Left/Right instructions don't set ZF and SF flags.
-//
-// If the operand being shifted is 32-bits then upper three bits are masked
-// by hardware to get actual shift count. Similarly for 64-bit operands
-// shift count is narrowed to [0..63]. If the resulting shift count is zero,
-// then shift operation won't modify flags.
-//
-// TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
-// if the shift count is known to be non-zero and in the range depending on the
-// operand size.
+ // Note that Rotate Left/Right instructions don't set ZF and SF flags.
+ //
+ // If the operand being shifted is 32-bits then upper three bits are masked
+ // by hardware to get actual shift count. Similarly for 64-bit operands
+ // shift count is narrowed to [0..63]. If the resulting shift count is zero,
+ // then shift operation won't modify flags.
+ //
+ // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
+ // if the shift count is known to be non-zero and in the range depending on the
+ // operand size.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_X86_
// The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
@@ -1096,72 +912,7 @@ int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG.
-//
-// Arguments:
-// node - The PUTARG_REG node.
-// argReg - The register in which to pass the argument.
-// info - The info for the node's using call.
-// isVarArgs - True if the call uses a varargs calling convention.
-// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
-//
-// Return Value:
-// None.
-//
-void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info)
-{
- assert(node != nullptr);
- assert(node->OperIsPutArgReg());
- info->srcCount = 1;
- regNumber argReg = node->gtRegNum;
- assert(argReg != REG_NA);
-
- // Set the register requirements for the node.
- const regMaskTP argMask = genRegMask(argReg);
- info->setDstCandidates(this, argMask);
- info->setSrcCandidates(this, argMask);
-
- // To avoid redundant moves, have the argument operand computed in the
- // register in which the argument is passed to the call.
- LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1);
- op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this));
- op1Info->info.isDelayFree = true;
- useList.Append(op1Info);
-}
-
-//------------------------------------------------------------------------
-// HandleFloatVarArgs: Handle additional register requirements for a varargs call
-//
-// Arguments:
-// call - The call node of interest
-// argNode - The current argument
-//
-// Return Value:
-// None.
-//
-// Notes:
-// In the case of a varargs call, the ABI dictates that if we have floating point args,
-// we must pass the enregistered arguments in both the integer and floating point registers.
-// Since the integer register is not associated with the arg node, we will reserve it as
-// an internal register on the call so that it is not used during the evaluation of the call node
-// (e.g. for the target).
-void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs)
-{
-#if FEATURE_VARARG
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- *callHasFloatRegArgs = true;
-
- regNumber argReg = argNode->gtRegNum;
- regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- info->setInternalIntCount(info->internalIntCount + 1);
- info->addInternalCandidates(this, genRegMask(targetReg));
- }
-#endif // FEATURE_VARARG
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+// BuildCall: Set the NodeInfo for a call.
//
// Arguments:
// call - The call node of interest
@@ -1169,8 +920,9 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTr
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
+void LinearScan::BuildCall(GenTreeCall* call)
{
+ TreeNodeInfo* info = currentNodeInfo;
bool hasMultiRegRetVal = false;
ReturnTypeDesc* retTypeDesc = nullptr;
@@ -1276,7 +1028,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
if (argNode->OperIsPutArgReg())
{
info->srcCount++;
- HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs);
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
appendLocationInfoToList(argNode);
}
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
@@ -1286,7 +1038,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
{
assert(entry->Current()->OperIsPutArgReg());
info->srcCount++;
- HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs);
+ HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
appendLocationInfoToList(entry->Current());
}
}
@@ -1416,7 +1168,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+// BuildBlockStore: Set the NodeInfo for a block store.
//
// Arguments:
// blkNode - The block store node of interest
@@ -1424,11 +1176,12 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info)
+void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
+ TreeNodeInfo* info = currentNodeInfo;
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
LocationInfoListNode* dstAddrInfo = nullptr;
LocationInfoListNode* sourceInfo = nullptr;
@@ -1663,7 +1416,7 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* i
#ifdef FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
+// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
//
// Arguments:
// tree - The node of interest
@@ -1671,9 +1424,10 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* i
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNodeInfo* info)
+void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
{
- info->srcCount = 0;
+ TreeNodeInfo* info = currentNodeInfo;
+ info->srcCount = 0;
assert(info->dstCount == 0);
if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
@@ -1774,7 +1528,7 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNode
if (type != TYP_STRUCT)
{
- TreeNodeInfoInitSimple(putArgStk, info);
+ BuildSimple(putArgStk);
return;
}
@@ -1838,7 +1592,7 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNode
#endif // FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
-// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP.
+// BuildLclHeap: Set the NodeInfo for a GT_LCLHEAP.
//
// Arguments:
// tree - The node of interest
@@ -1846,9 +1600,10 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNode
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildLclHeap(GenTree* tree)
{
- info->srcCount = 1;
+ TreeNodeInfo* info = currentNodeInfo;
+ info->srcCount = 1;
assert(info->dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
@@ -1932,7 +1687,7 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
+// BuildModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
//
// Arguments:
// tree - The node of interest
@@ -1940,10 +1695,11 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info)
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildModDiv(GenTree* tree)
{
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
+ TreeNodeInfo* info = currentNodeInfo;
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
assert(info->dstCount == 1);
@@ -2013,7 +1769,7 @@ void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info)
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
+// BuildIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
//
// Arguments:
// tree - The node of interest
@@ -2021,8 +1777,9 @@ void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info)
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildIntrinsic(GenTree* tree)
{
+ TreeNodeInfo* info = currentNodeInfo;
// Both operand and its result must be of floating point type.
GenTree* op1 = tree->gtGetOp1();
assert(varTypeIsFloating(op1));
@@ -2081,7 +1838,7 @@ void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info)
#ifdef FEATURE_SIMD
//------------------------------------------------------------------------
-// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+// BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
//
// Arguments:
// tree - The GT_SIMD node of interest
@@ -2089,8 +1846,9 @@ void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info)
// Return Value:
// None.
-void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
+void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
{
+ TreeNodeInfo* info = currentNodeInfo;
// Only SIMDIntrinsicInit can be contained. Other than that,
// only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount.
if (simdTree->isContained())
@@ -2481,7 +2239,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
#ifdef FEATURE_HW_INTRINSICS
//------------------------------------------------------------------------
-// TreeNodeInfoInitHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
+// BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
//
// Arguments:
// tree - The GT_HWIntrinsic node of interest
@@ -2489,8 +2247,9 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
// Return Value:
// None.
-void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info)
+void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
+ TreeNodeInfo* info = currentNodeInfo;
NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID);
if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2)
@@ -2601,7 +2360,7 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree,
#endif
//------------------------------------------------------------------------
-// TreeNodeInfoInitCast: Set the NodeInfo for a GT_CAST.
+// BuildCast: Set the NodeInfo for a GT_CAST.
//
// Arguments:
// tree - The node of interest
@@ -2609,8 +2368,9 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree,
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildCast(GenTree* tree)
{
+ TreeNodeInfo* info = currentNodeInfo;
// TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
// see CodeGen::genIntToIntCast()
@@ -2642,77 +2402,15 @@ void LinearScan::TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info)
}
}
-//------------------------------------------------------------------------
-// TreeNodeInfoInitGCWriteBarrier: Set the NodeInfo for a GT_STOREIND requiring a write barrier.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info)
-{
- assert(tree->OperGet() == GT_STOREIND);
-
- GenTreeStoreInd* dst = tree->AsStoreInd();
- GenTree* addr = dst->Addr();
- GenTree* src = dst->Data();
- LocationInfoListNode* addrInfo = getLocationInfo(addr);
- LocationInfoListNode* srcInfo = getLocationInfo(src);
-
- // In the case where we are doing a helper assignment, we need to actually instantiate the
- // address in a register.
- assert(!addr->isContained() && !src->isContained());
- useList.Append(addrInfo);
- useList.Append(srcInfo);
- info->srcCount = 2;
- assert(info->dstCount == 0);
-
- bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src);
-
-#if NOGC_WRITE_BARRIERS
-
-#if defined(_TARGET_X86_)
-
- if (useOptimizedWriteBarrierHelper)
- {
- // Special write barrier:
- // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
- // op2 (src) goes into any int register.
- addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER);
- srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
- }
-
-#else // !defined(_TARGET_X86_)
-#error "NOGC_WRITE_BARRIERS is not supported"
-#endif // !defined(_TARGET_X86_)
-
-#endif // NOGC_WRITE_BARRIERS
-
- if (!useOptimizedWriteBarrierHelper)
- {
- // For the standard JIT Helper calls:
- // op1 (addr) goes into REG_ARG_0 and
- // op2 (src) goes into REG_ARG_1
- addrInfo->info.setSrcCandidates(this, RBM_ARG_0);
- srcInfo->info.setSrcCandidates(this, RBM_ARG_1);
- }
-
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addrInfo->info.dstCount == 1);
- assert(srcInfo->info.dstCount == 1);
-}
-
//-----------------------------------------------------------------------------------------
-// TreeNodeInfoInitIndir: Specify register requirements for address expression of an indirection operation.
+// BuildIndir: Specify register requirements for address expression of an indirection operation.
//
// Arguments:
// indirTree - GT_IND or GT_STOREIND gentree node
//
-void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info)
+void LinearScan::BuildIndir(GenTreeIndir* indirTree)
{
+ TreeNodeInfo* info = currentNodeInfo;
// If this is the rhs of a block copy (i.e. non-enregisterable struct),
// it has no register requirements.
if (indirTree->TypeGet() == TYP_STRUCT)
@@ -2733,7 +2431,7 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* in
if (source->OperIsShiftOrRotate())
{
- info->srcCount += TreeNodeInfoInitShiftRotate(source, info);
+ info->srcCount += BuildShiftRotate(source);
}
else
{
@@ -2817,40 +2515,7 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* in
}
//------------------------------------------------------------------------
-// TreeNodeInfoInitCmp: Set the register requirements for a compare.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void LinearScan::TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info)
-{
- assert(tree->OperIsCompare() || tree->OperIs(GT_CMP));
-
- info->srcCount = 0;
- assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
-
-#ifdef _TARGET_X86_
- // If the compare is used by a jump, we just need to set the condition codes. If not, then we need
- // to store the result into the low byte of a register, which requires the dst be a byteable register.
- // We always set the dst candidates, though, because if this is compare is consumed by a jump, they
- // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
- // that flag is maintained until this location (especially for decomposed long compares).
- info->setDstCandidates(this, RBM_BYTE_REGS);
-#endif // _TARGET_X86_
-
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
-
- info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitMul: Set the NodeInfo for a multiply.
+// BuildMul: Set the NodeInfo for a multiply.
//
// Arguments:
// tree - The node of interest
@@ -2858,8 +2523,9 @@ void LinearScan::TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info)
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitMul(GenTree* tree, TreeNodeInfo* info)
+void LinearScan::BuildMul(GenTree* tree)
{
+ TreeNodeInfo* info = currentNodeInfo;
#if defined(_TARGET_X86_)
assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG));
#else