diff options
author | Carol Eidt <carol.eidt@microsoft.com> | 2018-01-28 16:20:51 -0800 |
---|---|---|
committer | Carol Eidt <carol.eidt@microsoft.com> | 2018-02-01 14:12:14 -0800 |
commit | 6b0f3f88535968aaeddcf3da950fe5988ed3d459 (patch) | |
tree | 2cdd92831047cd1a7a75ce14cb6b4f8a47e1e281 /src | |
parent | 01c93b958e67a3e02d00be287cb2e15e7e9ea3e3 (diff) | |
download | coreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.tar.gz coreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.tar.bz2 coreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.zip |
Refactor RefPosition and Interval Building
Move code for building `RefPosition`s and `Interval`s out of lsra.cpp into lsrabuild.cpp. Also, move common code from lsraarm*.cpp and lsraxarch.cpp to lsrabuild.cpp.
Maintain the `ListNodePool` on the `LinearScan` object to be used by all the building methods.
Rename `TreeNodeInfoInit` methods to `Build`, to more accurately reflect the next round of changes where they will build the `RefPosition`s directly.
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/jit/compiler.hpp | 33 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 4 | ||||
-rw-r--r-- | src/jit/lsra.cpp | 2688 | ||||
-rw-r--r-- | src/jit/lsra.h | 164 | ||||
-rw-r--r-- | src/jit/lsraarm.cpp | 134 | ||||
-rw-r--r-- | src/jit/lsraarm64.cpp | 121 | ||||
-rw-r--r-- | src/jit/lsraarmarch.cpp | 249 | ||||
-rw-r--r-- | src/jit/lsrabuild.cpp | 3205 | ||||
-rw-r--r-- | src/jit/lsraxarch.cpp | 504 |
10 files changed, 3522 insertions, 3581 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt index 74380e66d4..f4626ec562 100644 --- a/src/jit/CMakeLists.txt +++ b/src/jit/CMakeLists.txt @@ -53,6 +53,7 @@ set( JIT_SOURCES loopcloning.cpp lower.cpp lsra.cpp + lsrabuild.cpp morph.cpp objectalloc.cpp optcse.cpp diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index 5a2a2342e8..f905e73587 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -150,9 +150,9 @@ unsigned __int64 genFindHighestBit(unsigned __int64 mask) #endif // 0 /***************************************************************************** - * - * Return true if the given 64-bit value has exactly zero or one bits set. - */ +* +* Return true if the given 64-bit value has exactly zero or one bits set. +*/ template <typename T> inline BOOL genMaxOneBit(T value) @@ -161,9 +161,9 @@ inline BOOL genMaxOneBit(T value) } /***************************************************************************** - * - * Return true if the given 32-bit value has exactly zero or one bits set. - */ +* +* Return true if the given 32-bit value has exactly zero or one bits set. +*/ inline BOOL genMaxOneBit(unsigned value) { @@ -171,6 +171,27 @@ inline BOOL genMaxOneBit(unsigned value) } /***************************************************************************** +* +* Return true if the given 64-bit value has exactly one bit set. +*/ + +template <typename T> +inline BOOL genExactlyOneBit(T value) +{ + return ((value != 0) && genMaxOneBit(value)); +} + +/***************************************************************************** +* +* Return true if the given 32-bit value has exactly zero or one bits set. +*/ + +inline BOOL genExactlyOneBit(unsigned value) +{ + return ((value != 0) && genMaxOneBit(value)); +} + +/***************************************************************************** * * Given a value that has exactly one bit set, return the position of that * bit, in other words return the logarithm in base 2 of the given value. diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 70782f19fc..d212d86163 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -1500,8 +1500,8 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node) // // Workaround: // Note that LowerVirtualStubCall() sets addr->gtRegNum to VirtualStubParam.reg and Lowering::doPhase() - // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA before calling - // TreeNodeInfoInit(). Ideally we should set a flag on addr nodes that shouldn't be marked as contained + // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA. + // Ideally we should set a flag on addr nodes that shouldn't be marked as contained // (in LowerVirtualStubCall()), but we don't have any GTF_* flags left for that purpose. As a workaround // an explicit check is made here. // diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 641a9a1d11..07ce5503cb 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -333,36 +333,6 @@ regMaskTP LinearScan::internalFloatRegCandidates() } /***************************************************************************** - * Register types - *****************************************************************************/ -template <class T> -RegisterType regType(T type) -{ -#ifdef FEATURE_SIMD - if (varTypeIsSIMD(type)) - { - return FloatRegisterType; - } -#endif // FEATURE_SIMD - return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType; -} - -bool useFloatReg(var_types type) -{ - return (regType(type) == FloatRegisterType); -} - -bool registerTypesEquivalent(RegisterType a, RegisterType b) -{ - return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b); -} - -bool isSingleRegister(regMaskTP regMask) -{ - return (regMask != RBM_NONE && genMaxOneBit(regMask)); -} - -/***************************************************************************** * Inline functions for RegRecord *****************************************************************************/ @@ -470,198 +440,6 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) } #endif // DEBUG -// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately -// set such fields as isStructField - -Interval* LinearScan::newInterval(RegisterType theRegisterType) -{ - intervals.emplace_back(theRegisterType, allRegs(theRegisterType)); - Interval* newInt = &intervals.back(); - -#ifdef DEBUG - newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1); -#endif // DEBUG - - DBEXEC(VERBOSE, newInt->dump()); - return newInt; -} - -RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) -{ - refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType); - RefPosition* newRP = &refPositions.back(); -#ifdef DEBUG - newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1); -#endif // DEBUG - return newRP; -} - -//------------------------------------------------------------------------ -// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use -// register requirements on a single-def, single-use interval. -// -// Arguments: -// defRefPosition - The interval definition -// useRefPosition - The (sole) interval use -// -// Return Value: -// None. -// -// Assumptions: -// The two RefPositions are for the same interval, which is a tree-temp. -// -// Notes: -// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg. -// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that, -// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs -// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4 -// below. -// Here are the cases we consider (in this order): -// 1. If The defRefPosition specifies a single register, and there are no conflicting -// FixedReg uses of it between the def and use, we use that register, and the code generator -// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def. -// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no -// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator -// will insert the copy. -// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined -// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register), -/// we set the register requirements on the defRefPosition to the use registers, and the -// code generator will insert a copy on the def. We can't rely on the code generator to put a copy -// on the use if it has multiple possible candidates, as it won't know which one has been allocated. -// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register -// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set -// the useRefPosition to the def registers, for similar reasons to case #3. -// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts, -// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are -// single registers, codegen can insert the copy. -// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but -// has a conflict), we must insert a copy. The copy will be inserted before the use if the -// use is not fixed (in the fixed case, the code generator will insert the use). -// -// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is -// available for the lifetime. We end up allocating a register that must be spilled, and it probably -// won't be the register that is actually defined by the target instruction. So, we have to copy it -// and THEN spill it. In this case, we should be using the def requirement. But we need to change -// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but -// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway -// we can use the fixed-reg on the def. -// - -void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition) -{ - assert(!interval->isLocalVar); - - RefPosition* useRefPosition = defRefPosition->nextRefPosition; - regMaskTP defRegAssignment = defRefPosition->registerAssignment; - regMaskTP useRegAssignment = useRefPosition->registerAssignment; - RegRecord* defRegRecord = nullptr; - RegRecord* useRegRecord = nullptr; - regNumber defReg = REG_NA; - regNumber useReg = REG_NA; - bool defRegConflict = false; - bool useRegConflict = false; - - // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment - // on it, or we will fail to ensure that the fixedReg is busy at the time the target - // (of the node that uses this interval) is allocated. - bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree; - - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT)); - if (!canChangeUseAssignment) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE)); - } - if (defRefPosition->isFixedRegRef) - { - defReg = defRefPosition->assignedReg(); - defRegRecord = getRegisterRecord(defReg); - if (canChangeUseAssignment) - { - RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; - assert(currFixedRegRefPosition != nullptr && - currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation); - - if (currFixedRegRefPosition->nextRefPosition == nullptr || - currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation()) - { - // This is case #1. Use the defRegAssignment - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1)); - useRefPosition->registerAssignment = defRegAssignment; - return; - } - else - { - defRegConflict = true; - } - } - } - if (useRefPosition->isFixedRegRef) - { - useReg = useRefPosition->assignedReg(); - useRegRecord = getRegisterRecord(useReg); - RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition; - - // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null. - RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition(); - assert(nextFixedRegRefPosition != nullptr && - nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation); - - // First, check to see if there are any conflicting FixedReg references between the def and use. - if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation) - { - // OK, no conflicting FixedReg references. - // Now, check to see whether it is currently in use. - if (useRegRecord->assignedInterval != nullptr) - { - RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition; - LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation(); - if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation) - { - useRegConflict = true; - } - } - if (!useRegConflict) - { - // This is case #2. Use the useRegAssignment - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2)); - defRefPosition->registerAssignment = useRegAssignment; - return; - } - } - else - { - useRegConflict = true; - } - } - if (defRegRecord != nullptr && !useRegConflict) - { - // This is case #3. - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3)); - defRefPosition->registerAssignment = useRegAssignment; - return; - } - if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) - { - // This is case #4. - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4)); - useRefPosition->registerAssignment = defRegAssignment; - return; - } - if (defRegRecord != nullptr && useRegRecord != nullptr) - { - // This is case #5. - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5)); - RegisterType regType = interval->registerType; - assert((getRegisterType(interval, defRefPosition) == regType) && - (getRegisterType(interval, useRefPosition) == regType)); - regMaskTP candidates = allRegs(regType); - defRefPosition->registerAssignment = candidates; - return; - } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6)); - return; -} - //------------------------------------------------------------------------ // conflictingFixedRegReference: Determine whether the current RegRecord has a // fixed register use that conflicts with 'refPosition' @@ -708,242 +486,6 @@ bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition) return false; } -void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) -{ -#ifdef _TARGET_AMD64_ - if (compiler->opts.compDbgEnC) - { - // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs. - return; - } -#endif // _TARGET_AMD64_ - - Interval* theInterval = rp->getInterval(); - -#ifdef DEBUG - if (!doReverseCallerCallee()) -#endif // DEBUG - { - // Set preferences so that this register set will be preferred for earlier refs - theInterval->updateRegisterPreferences(rp->registerAssignment); - } -} - -//------------------------------------------------------------------------ -// checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps. -// -// Arguments: -// useRP - The use RefPosition of a tree temp (SDSU Interval) -// -// Notes: -// There are a couple of cases where this may over-constrain allocation: -// 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or -// 2. In the case where the defining node requires a temp distinct from the target (also a -// delay-free case). -// In those cases, if we propagate a single-register restriction from the consumer to the producer -// the delayed uses will not see a fixed reference in the PhysReg at that position, and may -// incorrectly allocate that register. -// TODO-CQ: This means that we may often require a copy at the use of this node's result. -// This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is -// created, causing a RefTypeFixedRef to be added at that location. This, however, results in -// more PhysReg RefPositions (a throughput impact), and a large number of diffs that require -// further analysis to determine benefit. -// See Issue #11274. -// -void LinearScan::checkConflictingDefUse(RefPosition* useRP) -{ - assert(useRP->refType == RefTypeUse); - Interval* theInterval = useRP->getInterval(); - assert(!theInterval->isLocalVar); - - RefPosition* defRP = theInterval->firstRefPosition; - - // All defs must have a valid treeNode, but we check it below to be conservative. - assert(defRP->treeNode != nullptr); - regMaskTP prevAssignment = defRP->registerAssignment; - regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); - if (newAssignment != RBM_NONE) - { - if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) - { - defRP->registerAssignment = newAssignment; - } - } - else - { - theInterval->hasConflictingDefUse = true; - } -} - -void LinearScan::associateRefPosWithInterval(RefPosition* rp) -{ - Referenceable* theReferent = rp->referent; - - if (theReferent != nullptr) - { - // All RefPositions except the dummy ones at the beginning of blocks - - if (rp->isIntervalRef()) - { - Interval* theInterval = rp->getInterval(); - - applyCalleeSaveHeuristics(rp); - - if (theInterval->isLocalVar) - { - if (RefTypeIsUse(rp->refType)) - { - RefPosition* const prevRP = theInterval->recentRefPosition; - if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum)) - { - prevRP->lastUse = false; - } - } - - rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) && - (rp->refType != RefTypeZeroInit) && !extendLifetimes(); - } - else if (rp->refType == RefTypeUse) - { - checkConflictingDefUse(rp); - rp->lastUse = true; - } - } - - RefPosition* prevRP = theReferent->recentRefPosition; - if (prevRP != nullptr) - { - prevRP->nextRefPosition = rp; - } - else - { - theReferent->firstRefPosition = rp; - } - theReferent->recentRefPosition = rp; - theReferent->lastRefPosition = rp; - } - else - { - assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); - } -} - -//--------------------------------------------------------------------------- -// newRefPosition: allocate and initialize a new RefPosition. -// -// Arguments: -// reg - reg number that identifies RegRecord to be associated -// with this RefPosition -// theLocation - LSRA location of RefPosition -// theRefType - RefPosition type -// theTreeNode - GenTree node for which this RefPosition is created -// mask - Set of valid registers for this RefPosition -// multiRegIdx - register position if this RefPosition corresponds to a -// multi-reg call node. -// -// Return Value: -// a new RefPosition -// -RefPosition* LinearScan::newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) -{ - RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); - - newRP->setReg(getRegisterRecord(reg)); - newRP->registerAssignment = mask; - - newRP->setMultiRegIdx(0); - newRP->setAllocateIfProfitable(false); - - associateRefPosWithInterval(newRP); - - DBEXEC(VERBOSE, newRP->dump()); - return newRP; -} - -//--------------------------------------------------------------------------- -// newRefPosition: allocate and initialize a new RefPosition. -// -// Arguments: -// theInterval - interval to which RefPosition is associated with. -// theLocation - LSRA location of RefPosition -// theRefType - RefPosition type -// theTreeNode - GenTree node for which this RefPosition is created -// mask - Set of valid registers for this RefPosition -// multiRegIdx - register position if this RefPosition corresponds to a -// multi-reg call node. -// -// Return Value: -// a new RefPosition -// -RefPosition* LinearScan::newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx /* = 0 */) -{ -#ifdef DEBUG - if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) - { - // In the case we're using floating point registers we must make sure - // this flag was set previously in the compiler since this will mandate - // whether LSRA will take into consideration FP reg killsets. - assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0)); - } -#endif // DEBUG - - // If this reference is constrained to a single register (and it's not a dummy - // or Kill reftype already), add a RefTypeFixedReg at this location so that its - // availability can be more accurately determined - - bool isFixedRegister = isSingleRegister(mask); - bool insertFixedRef = false; - if (isFixedRegister) - { - // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB) - if (theRefType == RefTypeUse || theRefType == RefTypeDef) - { - insertFixedRef = true; - } - } - - if (insertFixedRef) - { - regNumber physicalReg = genRegNumFromMask(mask); - RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); - assert(theInterval != nullptr); - assert((allRegs(theInterval->registerType) & mask) != 0); - } - - RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); - - newRP->setInterval(theInterval); - - // Spill info - newRP->isFixedRegRef = isFixedRegister; - -#ifndef _TARGET_AMD64_ - // We don't need this for AMD because the PInvoke method epilog code is explicit - // at register allocation time. - if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged && - theInterval->varNum == compiler->genReturnLocal) - { - mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); - noway_assert(mask != RBM_NONE); - } -#endif // !_TARGET_AMD64_ - newRP->registerAssignment = mask; - - newRP->setMultiRegIdx(multiRegIdx); - newRP->setAllocateIfProfitable(false); - - associateRefPosWithInterval(newRP); - - DBEXEC(VERBOSE, newRP->dump()); - return newRP; -} - /***************************************************************************** * Inline functions for Interval *****************************************************************************/ @@ -1122,6 +664,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #endif // MEASURE_MEM_ALLOC , intervals(LinearScanMemoryAllocatorInterval(theCompiler)) , refPositions(LinearScanMemoryAllocatorRefPosition(theCompiler)) + , listNodePool(theCompiler) { #ifdef DEBUG maxNodeLocation = 0; @@ -1949,42 +1492,6 @@ void LinearScan::identifyCandidatesExceptionDataflow() } } -//------------------------------------------------------------------------ -// IsContainableMemoryOp: Checks whether this is a memory op that can be contained. -// -// Arguments: -// node - the node of interest. -// -// Return value: -// True if this will definitely be a memory reference that could be contained. -// -// Notes: -// This differs from the isMemoryOp() method on GenTree because it checks for -// the case of doNotEnregister local. This won't include locals that -// for some other reason do not become register candidates, nor those that get -// spilled. -// Also, because we usually call this before we redo dataflow, any new lclVars -// introduced after the last dataflow analysis will not yet be marked lvTracked, -// so we don't use that. -// -bool LinearScan::isContainableMemoryOp(GenTree* node) -{ - if (node->isMemoryOp()) - { - return true; - } - if (node->IsLocal()) - { - if (!enregisterLocalVars) - { - return true; - } - LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum]; - return varDsc->lvDoNotEnregister; - } - return false; -} - bool LinearScan::isRegCandidate(LclVarDsc* varDsc) { // We shouldn't be called if opt settings do not permit register variables. @@ -2618,24 +2125,6 @@ VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarT return inVarToRegMap; } -// given a tree node -RefType refTypeForLocalRefNode(GenTree* node) -{ - assert(node->IsLocal()); - - // We don't support updates - assert((node->gtFlags & GTF_VAR_USEASG) == 0); - - if (node->gtFlags & GTF_VAR_DEF) - { - return RefTypeDef; - } - else - { - return RefTypeUse; - } -} - //------------------------------------------------------------------------ // checkLastUses: Check correctness of last use flags // @@ -2776,1486 +2265,6 @@ void LinearScan::checkLastUses(BasicBlock* block) } #endif // DEBUG -void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) -{ - for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) - { - if (mask & 1) - { - // This assumes that these are all "special" RefTypes that - // don't need to be recorded on the tree (hence treeNode is nullptr) - RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, - genRegMask(reg)); // This MUST occupy the physical register (obviously) - - if (isLastUse) - { - pos->lastUse = true; - } - } - } -} - -//------------------------------------------------------------------------ -// getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node. -// If the GT_STOREIND will generate a write barrier, determine the specific kill -// set required by the case-specific, platform-specific write barrier. If no -// write barrier is required, the kill set will be RBM_NONE. -// -// Arguments: -// tree - the GT_STOREIND node -// -// Return Value: a register mask of the registers killed -// -regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) -{ - assert(tree->OperIs(GT_STOREIND)); - - regMaskTP killMask = RBM_NONE; - - GenTree* data = tree->Data(); - - GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data); - if (writeBarrierForm != GCInfo::WBF_NoBarrier) - { - if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm)) - { - // We can't determine the exact helper to be used at this point, because it depends on - // the allocated register for the `data` operand. However, all the (x86) optimized - // helpers have the same kill set: EDX. - killMask = RBM_CALLEE_TRASH_NOGC; - } - else - { - // Figure out which helper we're going to use, and then get the kill set for that helper. - CorInfoHelpFunc helper = - compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm); - killMask = compiler->compHelperCallKillSet(helper); - } - } - - return killMask; -} - -//------------------------------------------------------------------------ -// getKillSetForNode: Return the registers killed by the given tree node. -// -// Arguments: -// compiler - the compiler context to use -// tree - the tree for which the kill set is needed. -// -// Return Value: a register mask of the registers killed -// -regMaskTP LinearScan::getKillSetForNode(GenTree* tree) -{ - regMaskTP killMask = RBM_NONE; - switch (tree->OperGet()) - { -#ifdef _TARGET_XARCH_ - case GT_MUL: - // We use the 128-bit multiply when performing an overflow checking unsigned multiply - // - if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx()) - { - // Both RAX and RDX are killed by the operation - killMask = RBM_RAX | RBM_RDX; - } - break; - - case GT_MULHI: -#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) - case GT_MUL_LONG: -#endif - killMask = RBM_RAX | RBM_RDX; - break; - - case GT_MOD: - case GT_DIV: - case GT_UMOD: - case GT_UDIV: - if (!varTypeIsFloating(tree->TypeGet())) - { - // Both RAX and RDX are killed by the operation - killMask = RBM_RAX | RBM_RDX; - } - break; -#endif // _TARGET_XARCH_ - - case GT_STORE_OBJ: - if (tree->OperIsCopyBlkOp()) - { - assert(tree->AsObj()->gtGcPtrCount != 0); - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF); - break; - } - __fallthrough; - - case GT_STORE_BLK: - case GT_STORE_DYN_BLK: - { - GenTreeBlk* blkNode = tree->AsBlk(); - bool isCopyBlk = varTypeIsStruct(blkNode->Data()); - switch (blkNode->gtBlkOpKind) - { - case GenTreeBlk::BlkOpKindHelper: - if (isCopyBlk) - { - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); - } - else - { - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); - } - break; - -#ifdef _TARGET_XARCH_ - case GenTreeBlk::BlkOpKindRepInstr: - if (isCopyBlk) - { - // rep movs kills RCX, RDI and RSI - killMask = RBM_RCX | RBM_RDI | RBM_RSI; - } - else - { - // rep stos kills RCX and RDI. - // (Note that the Data() node, if not constant, will be assigned to - // RCX, but it's find that this kills it, as the value is not available - // after this node in any case.) - killMask = RBM_RDI | RBM_RCX; - } - break; -#else - case GenTreeBlk::BlkOpKindRepInstr: -#endif - case GenTreeBlk::BlkOpKindUnroll: - case GenTreeBlk::BlkOpKindInvalid: - // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE - break; - } - } - break; - - case GT_RETURNTRAP: - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); - break; - case GT_CALL: -#ifdef _TARGET_X86_ - if (compiler->compFloatingPointUsed) - { - if (tree->TypeGet() == TYP_DOUBLE) - { - needDoubleTmpForFPCall = true; - } - else if (tree->TypeGet() == TYP_FLOAT) - { - needFloatTmpForFPCall = true; - } - } -#endif // _TARGET_X86_ -#if defined(_TARGET_X86_) || defined(_TARGET_ARM_) - if (tree->IsHelperCall()) - { - GenTreeCall* call = tree->AsCall(); - CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); - killMask = compiler->compHelperCallKillSet(helpFunc); - } - else -#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) - { - // if there is no FP used, we can ignore the FP kills - if (compiler->compFloatingPointUsed) - { - killMask = RBM_CALLEE_TRASH; - } - else - { - killMask = RBM_INT_CALLEE_TRASH; - } -#ifdef _TARGET_ARM_ - if (tree->AsCall()->IsVirtualStub()) - { - killMask |= compiler->virtualStubParamInfo->GetRegMask(); - } -#else // !_TARGET_ARM_ - // Verify that the special virtual stub call registers are in the kill mask. - // We don't just add them unconditionally to the killMask because for most architectures - // they are already in the RBM_CALLEE_TRASH set, - // and we don't want to introduce extra checks and calls in this hot function. - assert(!tree->AsCall()->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == - compiler->virtualStubParamInfo->GetRegMask())); -#endif - } - break; - case GT_STOREIND: - killMask = getKillSetForStoreInd(tree->AsStoreInd()); - break; - -#if defined(PROFILING_SUPPORTED) - // If this method requires profiler ELT hook then mark these nodes as killing - // callee trash registers (excluding RAX and XMM0). The reason for this is that - // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for - // more details. - case GT_RETURN: - if (compiler->compIsProfilerHookNeeded()) - { - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE); - } - break; - - case GT_PROF_HOOK: - if (compiler->compIsProfilerHookNeeded()) - { - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL); - } - break; -#endif // PROFILING_SUPPORTED - - default: - // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE - break; - } - return killMask; -} - -//------------------------------------------------------------------------ -// buildKillPositionsForNode: -// Given some tree node add refpositions for all the registers this node kills -// -// Arguments: -// tree - the tree for which kill positions should be generated -// currentLoc - the location at which the kills should be added -// -// Return Value: -// true - kills were inserted -// false - no kills were inserted -// -// Notes: -// The return value is needed because if we have any kills, we need to make sure that -// all defs are located AFTER the kills. On the other hand, if there aren't kills, -// the multiple defs for a regPair are in different locations. -// If we generate any kills, we will mark all currentLiveVars as being preferenced -// to avoid the killed registers. This is somewhat conservative. - -bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc) -{ - regMaskTP killMask = getKillSetForNode(tree); - bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); - if (killMask != RBM_NONE) - { - // The killMask identifies a set of registers that will be used during codegen. - // Mark these as modified here, so when we do final frame layout, we'll know about - // all these registers. This is especially important if killMask contains - // callee-saved registers, which affect the frame size since we need to save/restore them. - // In the case where we have a copyBlk with GC pointers, can need to call the - // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if - // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen, - // which is too late. - compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true)); - - addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); - - // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee - // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs, - // many of which appear to be regressions (because there is more spill on the infrequently path), - // but are not really because the frequent path becomes smaller. Validating these diffs will need - // to be done before making this change. - // if (!blockSequence[curBBSeqNum]->isRunRarely()) - if (enregisterLocalVars) - { - VarSetOps::Iter iter(compiler, currentLiveVars); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) - { - if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) - { - continue; - } - } - else -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varTypeIsFloating(varDsc) && - !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex)) - { - continue; - } - Interval* interval = getIntervalForLocalVar(varIndex); - if (isCallKill) - { - interval->preferCalleeSave = true; - } - regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); - - if (newPreferences != RBM_NONE) - { - interval->updateRegisterPreferences(newPreferences); - } - else - { - // If there are no callee-saved registers, the call could kill all the registers. - // This is a valid state, so in that case assert should not trigger. The RA will spill in order to - // free a register later. - assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); - } - } - } - - if (compiler->killGCRefs(tree)) - { - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, - (allRegs(TYP_REF) & ~RBM_ARG_REGS)); - } - return true; - } - - return false; -} - -//---------------------------------------------------------------------------- -// defineNewInternalTemp: Defines a ref position for an internal temp. -// -// Arguments: -// tree - Gentree node requiring an internal register -// regType - Register type -// currentLoc - Location of the temp Def position -// regMask - register mask of candidates for temp -// -RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) -{ - Interval* current = newInterval(regType); - current->isInternal = true; - return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0); -} - -//------------------------------------------------------------------------ -// buildInternalRegisterDefsForNode - build Def positions for internal -// registers required for tree node. -// -// Arguments: -// tree - Gentree node that needs internal registers -// temps - in-out array which is populated with ref positions -// created for Def of internal registers -// -// Returns: -// The total number of Def positions created for internal registers of tree no. -int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* temps[]) -{ - int count; - int internalIntCount = info->internalIntCount; - regMaskTP internalCands = info->getInternalCandidates(this); - - // If the number of internal integer registers required is the same as the number of candidate integer registers in - // the candidate set, then they must be handled as fixed registers. - // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.) - bool fixedRegs = false; - regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT)); - if (((int)genCountBits(internalIntCandidates)) == internalIntCount) - { - fixedRegs = true; - } - - for (count = 0; count < internalIntCount; count++) - { - regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT)); - if (fixedRegs) - { - internalIntCands = genFindLowestBit(internalIntCands); - internalCands &= ~internalIntCands; - } - temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands); - } - - int internalFloatCount = info->internalFloatCount; - for (int i = 0; i < internalFloatCount; i++) - { - regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates()); - temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, internalFPCands); - } - - assert(count < MaxInternalRegisters); - assert(count == (internalIntCount + internalFloatCount)); - return count; -} - -//------------------------------------------------------------------------ -// buildInternalRegisterUsesForNode - adds Use positions for internal -// registers required for tree node. -// -// Arguments: -// tree - Gentree node that needs internal registers -// defs - int array containing Def positions of internal -// registers. -// total - Total number of Def positions in 'defs' array. -// -// Returns: -// Void. -void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total) -{ - assert(total < MaxInternalRegisters); - - // defs[] has been populated by buildInternalRegisterDefsForNode - // now just add uses to the defs previously added. - for (int i = 0; i < total; i++) - { - RefPosition* prevRefPosition = defs[i]; - assert(prevRefPosition != nullptr); - regMaskTP mask = prevRefPosition->registerAssignment; - if (prevRefPosition->isPhysRegRef) - { - newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask); - } - else - { - RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0); - - if (info->isInternalRegDelayFree) - { - newest->delayRegFree = true; - } - } - } -} - -RegisterType LinearScan::getDefType(GenTree* tree) -{ - return tree->TypeGet(); -} - -//------------------------------------------------------------------------ -// LocationInfoListNodePool: manages a pool of `LocationInfoListNode` -// values to decrease overall memory usage -// during `buildIntervals`. -// -// `buildIntervals` involves creating a list of location info values per -// node that either directly produces a set of registers or that is a -// contained node with register-producing sources. However, these lists -// are short-lived: they are destroyed once the use of the corresponding -// node is processed. As such, there is typically only a small number of -// `LocationInfoListNode` values in use at any given time. Pooling these -// values avoids otherwise frequent allocations. -class LocationInfoListNodePool final -{ - LocationInfoListNode* m_freeList; - Compiler* m_compiler; - -public: - //------------------------------------------------------------------------ - // LocationInfoListNodePool::LocationInfoListNodePool: - // Creates a pool of `LocationInfoListNode` values. - // - // Arguments: - // compiler - The compiler context. - // preallocate - The number of nodes to preallocate. - // - LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = 0) : m_compiler(compiler) - { - if (preallocate > 0) - { - size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate; - LocationInfoListNode* preallocatedNodes = - reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA)); - - LocationInfoListNode* head = preallocatedNodes; - head->m_next = nullptr; - - for (unsigned i = 1; i < preallocate; i++) - { - LocationInfoListNode* node = &preallocatedNodes[i]; - node->m_next = head; - head = node; - } - - m_freeList = head; - } - } - - //------------------------------------------------------------------------ - // LocationInfoListNodePool::GetNode: Fetches an unused node from the - // pool. - // - // Arguments: - // l - - The `LsraLocation` for the `LocationInfo` value. - // i - The interval for the `LocationInfo` value. - // t - The IR node for the `LocationInfo` value - // regIdx - The register index for the `LocationInfo` value. - // - // Returns: - // A pooled or newly-allocated `LocationInfoListNode`, depending on the - // contents of the pool. - LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) - { - LocationInfoListNode* head = m_freeList; - if (head == nullptr) - { - head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode))); - } - else - { - m_freeList = head->m_next; - } - - head->loc = l; - head->interval = i; - head->treeNode = t; - head->m_next = nullptr; - - return head; - } - - //------------------------------------------------------------------------ - // LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the - // pool. - // - // Arguments: - // list - The list to return. - // - void ReturnNodes(LocationInfoList& list) - { - assert(list.m_head != nullptr); - assert(list.m_tail != nullptr); - - LocationInfoListNode* head = m_freeList; - list.m_tail->m_next = head; - m_freeList = list.m_head; - - list.m_head = nullptr; - list.m_tail = nullptr; - } -}; - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE -VARSET_VALRET_TP -LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc) -{ - assert(enregisterLocalVars); - VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler)); - regMaskTP fpCalleeKillSet = RBM_NONE; - if (!VarSetOps::IsEmpty(compiler, largeVectorVars)) - { - // We actually need to find any calls that kill the upper-half of the callee-save vector registers. - // But we will use as a proxy any node that kills floating point registers. - // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) - fpCalleeKillSet = getKillSetForNode(tree); - if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE) - { - VarSetOps::AssignNoCopy(compiler, liveLargeVectors, - VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars)); - VarSetOps::Iter iter(compiler, liveLargeVectors); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - Interval* varInterval = getIntervalForLocalVar(varIndex); - Interval* tempInterval = newInterval(varInterval->registerType); - tempInterval->isInternal = true; - RefPosition* pos = - newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED); - // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set - // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding - // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval, - // and set varInterval as the relatedInterval of tempInterval. - tempInterval->relatedInterval = varInterval->relatedInterval; - varInterval->relatedInterval = tempInterval; - } - } - } - return liveLargeVectors; -} - -void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree, - LsraLocation currentLoc, - VARSET_VALARG_TP liveLargeVectors) -{ - assert(enregisterLocalVars); - if (!VarSetOps::IsEmpty(compiler, liveLargeVectors)) - { - VarSetOps::Iter iter(compiler, liveLargeVectors); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - Interval* varInterval = getIntervalForLocalVar(varIndex); - Interval* tempInterval = varInterval->relatedInterval; - assert(tempInterval->isInternal == true); - RefPosition* pos = - newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED); - // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval - // of tempInterval. - varInterval->relatedInterval = tempInterval->relatedInterval; - tempInterval->relatedInterval = varInterval; - } - } -} -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - -#ifdef DEBUG -//------------------------------------------------------------------------ -// ComputeOperandDstCount: computes the number of registers defined by a -// node. -// -// For most nodes, this is simple: -// - Nodes that do not produce values (e.g. stores and other void-typed -// nodes) and nodes that immediately use the registers they define -// produce no registers -// - Nodes that are marked as defining N registers define N registers. -// -// For contained nodes, however, things are more complicated: for purposes -// of bookkeeping, a contained node is treated as producing the transitive -// closure of the registers produced by its sources. -// -// Arguments: -// operand - The operand for which to compute a register count. -// -// Returns: -// The number of registers defined by `operand`. -// -static int ComputeOperandDstCount(GenTree* operand) -{ - // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though - // note that it is not in the linear order. It seems best to check for !IsLIR() rather than - // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method. - if (!operand->IsLIR()) - { - return 0; - } - if (operand->isContained()) - { - int dstCount = 0; - for (GenTree* op : operand->Operands()) - { - dstCount += ComputeOperandDstCount(op); - } - - return dstCount; - } - if (operand->IsUnusedValue()) - { - // Operands that define an unused value do not produce any registers. - return 0; - } - if (operand->IsValue()) - { - // Operands that are values and are not contained consume all of their operands - // and produce one or more registers. - return operand->GetRegisterDstCount(); - } - else - { - // This must be one of the operand types that are neither contained nor produce a value. - // Stores and void-typed operands may be encountered when processing call nodes, which contain - // pointers to argument setup stores. - assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || - operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || - operand->TypeGet() == TYP_VOID); - return 0; - } -} - -//------------------------------------------------------------------------ -// ComputeAvailableSrcCount: computes the number of registers available as -// sources for a node. -// -// This is simply the sum of the number of registers produced by each -// operand to the node. -// -// Arguments: -// node - The node for which to compute a source count. -// -// Retures: -// The number of registers available as sources for `node`. -// -static int ComputeAvailableSrcCount(GenTree* node) -{ - int numSources = 0; - for (GenTree* operand : node->Operands()) - { - numSources += ComputeOperandDstCount(operand); - } - - return numSources; -} -#endif // DEBUG - -void LinearScan::buildRefPositionsForNode(GenTree* tree, - BasicBlock* block, - LocationInfoListNodePool& listNodePool, - LsraLocation currentLoc) -{ -#ifdef _TARGET_ARM_ - assert(!isRegPairType(tree->TypeGet())); -#endif // _TARGET_ARM_ - - // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. - // GT_CLS_VAR nodes should have been eliminated by rationalizer. - assert(tree->OperGet() != GT_ARGPLACE); - assert(tree->OperGet() != GT_LIST); - assert(tree->OperGet() != GT_CLS_VAR); - - // The LIR traversal visits only the first node in a GT_FIELD_LIST. - assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead()); - - // The set of internal temporary registers used by this node are stored in the - // gtRsvdRegs register mask. Clear it out. - tree->gtRsvdRegs = RBM_NONE; - -#ifdef DEBUG - if (VERBOSE) - { - dumpDefList(); - compiler->gtDispTree(tree, nullptr, nullptr, true); - } -#endif // DEBUG - - // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will - // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will - // be thrown away when we're done. - LocationInfoListNode* locationInfo = nullptr; - TreeNodeInfo tempInfo; - TreeNodeInfo* info = nullptr; - if (!tree->isContained() && tree->IsValue()) - { - locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree); - info = &locationInfo->info; - } - else - { - info = &tempInfo; - } - info->Initialize(this, tree); - TreeNodeInfoInit(tree, info); - -#ifdef DEBUG - if (VERBOSE) - { - printf(" +"); - info->dump(this); - tree->dumpLIRFlags(); - printf("\n"); - } -#endif // DEBUG - - assert(info->IsValid(this)); - int consume = info->srcCount; - int produce = info->dstCount; - -#ifdef DEBUG - if (VERBOSE) - { - if (tree->isContained()) - { - JITDUMP("Contained\n"); - } - else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && info->isLocalDefUse) - { - JITDUMP("Unused\n"); - } - else - { - JITDUMP(" consume=%d produce=%d\n", consume, produce); - } - } -#endif // DEBUG - - assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume)); - - if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; - if (isCandidateVar(varDsc)) - { - assert(consume == 0); - - // We handle tracked variables differently from non-tracked ones. If it is tracked, - // we simply add a use or def of the tracked variable. Otherwise, for a use we need - // to actually add the appropriate references for loading or storing the variable. - // - // It won't actually get used or defined until the appropriate ancestor tree node - // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument - // to a call - - assert(varDsc->lvTracked); - unsigned varIndex = varDsc->lvVarIndex; - - // We have only approximate last-use information at this point. This is because the - // execution order doesn't actually reflect the true order in which the localVars - // are referenced - but the order of the RefPositions will, so we recompute it after - // RefPositions are built. - // Use the old value for setting currentLiveVars - note that we do this with the - // not-quite-correct setting of lastUse. However, this is OK because - // 1) this is only for preferencing, which doesn't require strict correctness, and - // 2) the cases where these out-of-order uses occur should not overlap a kill. - // TODO-Throughput: clean this up once we have the execution order correct. At that point - // we can update currentLiveVars at the same place that we create the RefPosition. - if ((tree->gtFlags & GTF_VAR_DEATH) != 0) - { - VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); - } - - if (!info->isLocalDefUse && !tree->isContained()) - { - assert(produce != 0); - - locationInfo->interval = getIntervalForLocalVar(varIndex); - defList.Append(locationInfo); - } - return; - } - } - - // Handle the case of local variable assignment - Interval* varDefInterval = nullptr; - RefType defRefType = RefTypeDef; - - GenTree* defNode = tree; - - // noAdd means the node creates a def but for purposes of map - // management do not add it because data is not flowing up the - // tree - - bool noAdd = info->isLocalDefUse; - RefPosition* prevPos = nullptr; - - bool isSpecialPutArg = false; - - assert(!tree->OperIsAssignment()); - if (tree->OperIsLocalStore()) - { - GenTreeLclVarCommon* const store = tree->AsLclVarCommon(); - assert((consume > 1) || (regType(store->gtOp1->TypeGet()) == regType(store->TypeGet()))); - - LclVarDsc* varDsc = &compiler->lvaTable[store->gtLclNum]; - if (isCandidateVar(varDsc)) - { - // We always push the tracked lclVar intervals - assert(varDsc->lvTracked); - unsigned varIndex = varDsc->lvVarIndex; - varDefInterval = getIntervalForLocalVar(varIndex); - defRefType = refTypeForLocalRefNode(tree); - defNode = tree; - if (produce == 0) - { - produce = 1; - noAdd = true; - } - - assert(consume <= MAX_RET_REG_COUNT); - if (consume == 1) - { - // Get the location info for the register defined by the first operand. - LocationInfoListNode& operandInfo = *(useList.Begin()); - assert(operandInfo.treeNode == tree->gtGetOp1()); - - Interval* srcInterval = operandInfo.interval; - if (srcInterval->relatedInterval == nullptr) - { - // Preference the source to the dest, unless this is a non-last-use localVar. - // Note that the last-use info is not correct, but it is a better approximation than preferencing - // the source to the dest, if the source's lifetime extends beyond the dest. - if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0) - { - srcInterval->assignRelatedInterval(varDefInterval); - } - } - else if (!srcInterval->isLocalVar) - { - // Preference the source to dest, if src is not a local var. - srcInterval->assignRelatedInterval(varDefInterval); - } - } - - if ((tree->gtFlags & GTF_VAR_DEATH) == 0) - { - VarSetOps::AddElemD(compiler, currentLiveVars, varIndex); - } - } - else if (store->gtOp1->OperIs(GT_BITCAST)) - { - store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet(); - - // Get the location info for the register defined by the first operand. - LocationInfoListNode& operandInfo = *(useList.Begin()); - assert(operandInfo.treeNode == tree->gtGetOp1()); - - Interval* srcInterval = operandInfo.interval; - srcInterval->registerType = regType(store->TypeGet()); - - RefPosition* srcDefPosition = srcInterval->firstRefPosition; - assert(srcDefPosition != nullptr); - assert(srcDefPosition->refType == RefTypeDef); - assert(srcDefPosition->treeNode == store->gtOp1); - - srcDefPosition->registerAssignment = allRegs(store->TypeGet()); - operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet())); - } - } - else if (noAdd && produce == 0) - { - // Dead nodes may remain after tree rationalization, decomposition or lowering. - // They should be marked as UnusedValue. - // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation. - assert(!noAdd || (produce != 0)); - } - - Interval* prefSrcInterval = nullptr; - - // If this is a binary operator that will be encoded with 2 operand fields - // (i.e. the target is read-modify-write), preference the dst to op1. - - bool hasDelayFreeSrc = info->hasDelayFreeSrc; - -#if defined(DEBUG) && defined(_TARGET_X86_) - // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode - // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the - // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in - // situations like the following: - // - // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d> - // - // /--* t1026 ref - // t1352 = * putarg_reg ref REG NA - // - // t342 = lclVar int V14 loc6 u:4 REG NA $50c - // - // t343 = const int 1 REG NA $41 - // - // /--* t342 int - // +--* t343 int - // t344 = * + int REG NA $495 - // - // t345 = lclVar int V04 arg4 u:2 REG NA $100 - // - // /--* t344 int - // +--* t345 int - // t346 = * % int REG NA $496 - // - // /--* t346 int - // t1353 = * putarg_reg int REG NA - // - // t1354 = lclVar ref V52 tmp35 (last use) REG NA - // - // /--* t1354 ref - // t1355 = * lea(b+0) byref REG NA - // - // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the - // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register - // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to - // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be - // available due to the special put arg. - const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER; -#else - const bool supportsSpecialPutArg = true; -#endif - - if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && - (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0) - { - // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, - // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register - // (which is actually what we hope will happen). - JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n"); - - // Get the register information for the first operand of the node. - LocationInfoListNode* operandDef = useList.Begin(); - assert(operandDef->treeNode == tree->gtGetOp1()); - - // Preference the destination to the interval of the first register defined by the first operand. - Interval* srcInterval = operandDef->interval; - assert(srcInterval->isLocalVar); - prefSrcInterval = srcInterval; - isSpecialPutArg = true; - } - - RefPosition* internalRefs[MaxInternalRegisters]; - -#ifdef DEBUG - // If we are constraining the registers for allocation, we will modify all the RefPositions - // we've built for this node after we've created them. In order to do that, we'll remember - // the last RefPosition prior to those created for this node. - RefPositionIterator refPositionMark = refPositions.backPosition(); -#endif // DEBUG - - // Make intervals for all the 'internal' register requirements for this node, - // where internal means additional registers required temporarily. - // Create a RefTypeDef RefPosition for each such interval. - int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs); - - // Make use RefPositions for all used values. - int consumed = 0; - for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end; - listNode = listNode->Next()) - { - LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode); - - // For tree temps, a use is always a last use and the end of the range; - // this is set by default in newRefPosition - GenTree* const useNode = locInfo.treeNode; - assert(useNode != nullptr); - - Interval* srcInterval = locInfo.interval; - TreeNodeInfo& useNodeInfo = locInfo.info; - if (useNodeInfo.isTgtPref) - { - prefSrcInterval = srcInterval; - } - - const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree); - - regMaskTP candidates = useNodeInfo.getSrcCandidates(this); -#ifdef _TARGET_ARM_ - regMaskTP allCandidates = candidates; - - if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp()) - { - // get i-th candidate, set bits in useCandidates must be in sequential order. - candidates = genFindLowestReg(allCandidates); - allCandidates &= ~candidates; - } -#endif // _TARGET_ARM_ - - assert((candidates & allRegs(srcInterval->registerType)) != 0); - - // For non-localVar uses we record nothing, as nothing needs to be written back to the tree. - GenTree* const refPosNode = srcInterval->isLocalVar ? useNode : nullptr; - RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, 0); - if (delayRegFree) - { - pos->delayRegFree = true; - } - - if (useNode->IsRegOptional()) - { - pos->setAllocateIfProfitable(true); - } - consumed++; - - // Create additional use RefPositions for multi-reg nodes. - for (int idx = 1; idx < locInfo.info.dstCount; idx++) - { - noway_assert(srcInterval->relatedInterval != nullptr); - srcInterval = srcInterval->relatedInterval; -#ifdef _TARGET_ARM_ - if (useNode->OperIsPutArgSplit() || - (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST))) - { - // get first candidate, set bits in useCandidates must be in sequential order. - candidates = genFindLowestReg(allCandidates); - allCandidates &= ~candidates; - } -#endif // _TARGET_ARM_ - RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, idx); - consumed++; - } - } - - assert(consumed == consume); - if (consume != 0) - { - listNodePool.ReturnNodes(useList); - } - - buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount); - - RegisterType registerType = getDefType(tree); - regMaskTP candidates = info->getDstCandidates(this); - regMaskTP useCandidates = info->getSrcCandidates(this); - -#ifdef DEBUG - if (VERBOSE && produce) - { - printf("Def candidates "); - dumpRegMask(candidates); - printf(", Use candidates "); - dumpRegMask(useCandidates); - printf("\n"); - } -#endif // DEBUG - -#if defined(_TARGET_AMD64_) - // Multi-reg call node is the only node that could produce multi-reg value - assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT)); -#endif // _TARGET_xxx_ - - // Add kill positions before adding def positions - buildKillPositionsForNode(tree, currentLoc + 1); - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - VARSET_TP liveLargeVectors(VarSetOps::UninitVal()); - if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE)) - { - // Build RefPositions for saving any live large vectors. - // This must be done after the kills, so that we know which large vectors are still live. - VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1)); - } -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - - ReturnTypeDesc* retTypeDesc = nullptr; - bool isMultiRegCall = tree->IsMultiRegCall(); - if (isMultiRegCall) - { - retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); - assert((int)genCountBits(candidates) == produce); - assert(candidates == retTypeDesc->GetABIReturnRegs()); - } - - // push defs - LocationInfoList locationInfoList; - LsraLocation defLocation = currentLoc + 1; - Interval* interval = varDefInterval; - // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field. - // Keep track of the previous interval allocated, for that purpose. - Interval* prevInterval = nullptr; - for (int i = 0; i < produce; i++) - { - regMaskTP currCandidates = candidates; - - // In case of multi-reg call node, registerType is given by - // the type of ith position return register. - if (isMultiRegCall) - { - registerType = retTypeDesc->GetReturnRegType((unsigned)i); - currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i)); - useCandidates = allRegs(registerType); - } - -#ifdef _TARGET_ARM_ - // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order. - if (tree->OperIsPutArgSplit() || tree->OperIsMultiRegOp()) - { - // get i-th candidate - currCandidates = genFindLowestReg(candidates); - candidates &= ~currCandidates; - } -#endif // _TARGET_ARM_ - - if (interval == nullptr) - { - // Make a new interval - interval = newInterval(registerType); - if (hasDelayFreeSrc || info->isInternalRegDelayFree) - { - interval->hasInterferingUses = true; - } - else if (tree->OperIsConst()) - { - assert(!tree->IsReuseRegVal()); - interval->isConstant = true; - } - - if ((currCandidates & useCandidates) != RBM_NONE) - { - interval->updateRegisterPreferences(currCandidates & useCandidates); - } - - if (isSpecialPutArg) - { - interval->isSpecialPutArg = true; - } - } - else - { - assert(registerTypesEquivalent(interval->registerType, registerType)); - } - - if (prefSrcInterval != nullptr) - { - interval->assignRelatedIntervalIfUnassigned(prefSrcInterval); - } - - // for assignments, we want to create a refposition for the def - // but not push it - if (!noAdd) - { - if (i == 0) - { - locationInfo->interval = interval; - prevInterval = interval; - defList.Append(locationInfo); - } - else - { - // This is the 2nd or subsequent register defined by a multi-reg node. - // Connect them using 'relatedInterval'. - noway_assert(prevInterval != nullptr); - prevInterval->relatedInterval = interval; - prevInterval = interval; - prevInterval->isMultiReg = true; - interval->isMultiReg = true; - } - } - - RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i); - if (info->isLocalDefUse) - { - // This must be an unused value, OR it is a special node for which we allocate - // a target register even though it produces no value. - assert(defNode->IsUnusedValue() || (defNode->gtOper == GT_LOCKADD)); - pos->isLocalDefUse = true; - pos->lastUse = true; - } - interval->updateRegisterPreferences(currCandidates); - interval->updateRegisterPreferences(useCandidates); - interval = nullptr; - } - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - // SaveDef position must be at the same location as Def position of call node. - if (enregisterLocalVars) - { - buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors); - } -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - -#ifdef DEBUG - // If we are constraining registers, modify all the RefPositions we've just built to specify the - // minimum reg count required. - if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT)) - { - // The number of registers required for a tree node is the sum of - // consume + produce + internalCount. This is the minimum - // set of registers that needs to be ensured in candidate - // set of ref positions created. - unsigned minRegCount = consume + produce + info->internalIntCount + info->internalFloatCount; - - for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) - { - RefPosition* newRefPosition = &(*refPositionMark); - unsigned minRegCountForRef = minRegCount; - if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree) - { - // If delayRegFree, then Use will interfere with the destination of the consuming node. - // Therefore, we also need add the kill set of the consuming node to minRegCount. - // - // For example consider the following IR on x86, where v01 and v02 - // are method args coming in ecx and edx respectively. - // GT_DIV(v01, v02) - // - // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node. - // - // Assume further JitStressRegs=2, which would constrain candidates to callee trashable - // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01. - // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and - // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for - // the use position of v02 also needs to take into account the kill set of its consuming node. - regMaskTP killMask = getKillSetForNode(tree); - if (killMask != RBM_NONE) - { - minRegCountForRef += genCountBits(killMask); - } - } - newRefPosition->minRegCandidateCount = minRegCountForRef; - if (newRefPosition->IsActualRef() && doReverseCallerCallee()) - { - Interval* interval = newRefPosition->getInterval(); - regMaskTP oldAssignment = newRefPosition->registerAssignment; - regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); - newRefPosition->registerAssignment = - getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef); - if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && - !interval->isLocalVar) - { - checkConflictingDefUse(newRefPosition); - } - } - } - } -#endif // DEBUG - JITDUMP("\n"); -} - -// make an interval for each physical register -void LinearScan::buildPhysRegRecords() -{ - RegisterType regType = IntRegisterType; - for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) - { - RegRecord* curr = &physRegs[reg]; - curr->init(reg); - } -} - -BasicBlock* getNonEmptyBlock(BasicBlock* block) -{ - while (block != nullptr && block->bbTreeList == nullptr) - { - BasicBlock* nextBlock = block->bbNext; - // Note that here we use the version of NumSucc that does not take a compiler. - // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan. - // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which - // case we don't need the version that takes a compiler. - assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE))); - // sometimes the first block is empty and ends with an uncond branch - // assert( block->GetSucc(0) == nextBlock); - block = nextBlock; - } - assert(block != nullptr && block->bbTreeList != nullptr); - return block; -} - -//------------------------------------------------------------------------ -// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block -// -// Notes: -// Prior to calling this method, 'currentLiveVars' must be set to the set of register -// candidate variables that are liveIn to the first block. -// For each register candidate that is live-in to the first block: -// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created. -// - Otherwise, it will be marked as spilled, since it will not be assigned a register -// on entry and will be loaded from memory on the undefined path. -// Note that, when the compInitMem option is not set, we may encounter these on -// paths that are protected by the same condition as an earlier def. However, since -// we don't do the analysis to determine this - and couldn't rely on always identifying -// such cases even if we tried - we must conservatively treat the undefined path as -// being possible. This is a relatively rare case, so the introduced conservatism is -// not expected to warrant the analysis required to determine the best placement of -// an initialization. -// -void LinearScan::insertZeroInitRefPositions() -{ - assert(enregisterLocalVars); -#ifdef DEBUG - VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn)); - assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars)); -#endif // DEBUG - - // insert defs for this, then a block boundary - - VarSetOps::Iter iter(compiler, currentLiveVars); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - if (!varDsc->lvIsParam && isCandidateVar(varDsc)) - { - JITDUMP("V%02u was live in to first block:", varNum); - Interval* interval = getIntervalForLocalVar(varIndex); - if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) - { - JITDUMP(" creating ZeroInit\n"); - GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); - RefPosition* pos = - newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); - varDsc->lvMustInit = true; - } - else - { - setIntervalAsSpilled(interval); - JITDUMP(" marking as spilled\n"); - } - } - } -} - -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -// ----------------------------------------------------------------------- -// Sets the register state for an argument of type STRUCT for System V systems. -// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp -// for how state for argument is updated for unix non-structs and Windows AMD64 structs. -void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) -{ - assert(varTypeIsStruct(argDsc)); - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - - if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) - { - if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) - { - assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); - floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); - } - else - { - assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); - intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); - } - } - - if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) - { - if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) - { - assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); - floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); - } - else - { - assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); - intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); - } - } -} - -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - -//------------------------------------------------------------------------ -// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate -// regState (either compiler->intRegState or compiler->floatRegState), -// with the lvArgReg on "argDsc" -// -// Arguments: -// argDsc - the argument for which the state is to be updated. -// -// Return Value: None -// -// Assumptions: -// The argument is live on entry to the function -// (or is untracked and therefore assumed live) -// -// Notes: -// This relies on a method in regAlloc.cpp that is shared between LSRA -// and regAlloc. It is further abstracted here because regState is updated -// separately for tracked and untracked variables in LSRA. -// -void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) -{ -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - // For System V AMD64 calls the argDsc can have 2 registers (for structs.) - // Handle them here. - if (varTypeIsStruct(argDsc)) - { - unixAmd64UpdateRegStateForArg(argDsc); - } - else -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - { - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - // In the case of AMD64 we'll still use the floating point registers - // to model the register usage for argument on vararg calls, so - // we will ignore the varargs condition to determine whether we use - // XMM registers or not for setting up the call. - bool isFloat = (isFloatRegType(argDsc->lvType) -#ifndef _TARGET_AMD64_ - && !compiler->info.compIsVarArgs -#endif - && !compiler->opts.compUseSoftFP); - - if (argDsc->lvIsHfaRegArg()) - { - isFloat = true; - } - - if (isFloat) - { - JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); - compiler->raUpdateRegStateForArg(floatRegState, argDsc); - } - else - { - JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); -#if FEATURE_MULTIREG_ARGS - if (argDsc->lvOtherArgReg != REG_NA) - { - JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg)); - } -#endif // FEATURE_MULTIREG_ARGS - compiler->raUpdateRegStateForArg(intRegState, argDsc); - } - } -} - //------------------------------------------------------------------------ // findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables. // @@ -4371,448 +2380,6 @@ BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block, return predBlock; } -void LinearScan::buildIntervals() -{ - BasicBlock* block; - - JITDUMP("\nbuildIntervals ========\n"); - - // Now build (empty) records for all of the physical registers - buildPhysRegRecords(); - -#ifdef DEBUG - if (VERBOSE) - { - printf("\n-----------------\n"); - printf("LIVENESS:\n"); - printf("-----------------\n"); - foreach_block(compiler, block) - { - printf("BB%02u use def in out\n", block->bbNum); - dumpConvertedVarSet(compiler, block->bbVarUse); - printf("\n"); - dumpConvertedVarSet(compiler, block->bbVarDef); - printf("\n"); - dumpConvertedVarSet(compiler, block->bbLiveIn); - printf("\n"); - dumpConvertedVarSet(compiler, block->bbLiveOut); - printf("\n"); - } - } -#endif // DEBUG - -#if DOUBLE_ALIGN - // We will determine whether we should double align the frame during - // identifyCandidates(), but we initially assume that we will not. - doDoubleAlign = false; -#endif - - identifyCandidates(); - - // Figure out if we're going to use a frame pointer. We need to do this before building - // the ref positions, because those objects will embed the frame register in various register masks - // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will - // remove the frame pointer from the masks. - setFrameType(); - - DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); - - // second part: - JITDUMP("\nbuildIntervals second part ========\n"); - currentLoc = 0; - // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were - // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering. - // Previously, the block sequencing was done for the (formerly separate) TreeNodeInfoInit pass, - // and the curBBNum was left as the last block sequenced. This block was then used to set the - // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the - // normalized entry weight (compiler->fgCalledCount), but that results in a net regression. - if (!blockSequencingDone) - { - setBlockSequence(); - } - curBBNum = blockSequence[bbSeqCount - 1]->bbNum; - - // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex. - // Assign these RefPositions to the (nonexistent) BB0. - curBBNum = 0; - - LclVarDsc* argDsc; - unsigned int lclNum; - - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; - floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; - - for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) - { - lclNum = compiler->lvaTrackedToVarNum[varIndex]; - argDsc = &(compiler->lvaTable[lclNum]); - - if (!argDsc->lvIsParam) - { - continue; - } - - // Only reserve a register if the argument is actually used. - // Is it dead on entry? If compJmpOpUsed is true, then the arguments - // have to be kept alive, so we have to consider it as live on entry. - // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we - // won't have done dataflow on it, but it needs to be marked as live-in so - // it will get saved in the prolog. - if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode) - { - continue; - } - - if (argDsc->lvIsRegArg) - { - updateRegStateForArg(argDsc); - } - - if (isCandidateVar(argDsc)) - { - Interval* interval = getIntervalForLocalVar(varIndex); - regMaskTP mask = allRegs(TypeGet(argDsc)); - if (argDsc->lvIsRegArg) - { - // Set this interval as currently assigned to that register - regNumber inArgReg = argDsc->lvArgReg; - assert(inArgReg < REG_COUNT); - mask = genRegMask(inArgReg); - assignPhysReg(inArgReg, interval); - } - RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); - } - else if (varTypeIsStruct(argDsc->lvType)) - { - for (unsigned fieldVarNum = argDsc->lvFieldLclStart; - fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) - { - LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]); - if (fieldVarDsc->lvLRACandidate) - { - assert(fieldVarDsc->lvTracked); - Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex); - RefPosition* pos = - newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc))); - } - } - } - else - { - // We can overwrite the register (i.e. codegen saves it on entry) - assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister || - !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode)); - } - } - - // Now set up the reg state for the non-tracked args - // (We do this here because we want to generate the ParamDef RefPositions in tracked - // order, so that loop doesn't hit the non-tracked args) - - for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++) - { - argDsc = &(compiler->lvaTable[argNum]); - - if (argDsc->lvPromotedStruct()) - { - noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = argDsc->lvFieldLclStart; - argDsc = &(compiler->lvaTable[fieldVarNum]); - } - noway_assert(argDsc->lvIsParam); - if (!argDsc->lvTracked && argDsc->lvIsRegArg) - { - updateRegStateForArg(argDsc); - } - } - - // If there is a secret stub param, it is also live in - if (compiler->info.compPublishStubParam) - { - intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; - } - - LocationInfoListNodePool listNodePool(compiler, 8); - - BasicBlock* predBlock = nullptr; - BasicBlock* prevBlock = nullptr; - - // Initialize currentLiveVars to the empty set. We will set it to the current - // live-in at the entry to each block (this will include the incoming args on - // the first block). - VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler)); - - for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) - { - JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum); - - bool predBlockIsAllocated = false; - predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); - if (predBlock) - { - JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n", - block->bbNum, predBlock->bbNum); - assert(predBlock->bbNum <= bbNumMaxBeforeResolution); - blockInfo[block->bbNum].predBBNum = predBlock->bbNum; - } - - if (enregisterLocalVars) - { - VarSetOps::AssignNoCopy(compiler, currentLiveVars, - VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn)); - - if (block == compiler->fgFirstBB) - { - insertZeroInitRefPositions(); - // The first real location is at 1; 0 is for the entry. - currentLoc = 1; - } - - // Any lclVars live-in to a block are resolution candidates. - VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); - - // Determine if we need any DummyDefs. - // We need DummyDefs for cases where "predBlock" isn't really a predecessor. - // Note that it's possible to have uses of unitialized variables, in which case even the first - // block may require DummyDefs, which we are not currently adding - this means that these variables - // will always be considered to be in memory on entry (and reloaded when the use is encountered). - // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized - // variables (which may actually be initialized along the dynamically executed paths, but not - // on all static paths), we wind up with excessive liveranges for some of these variables. - VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); - if (predBlock) - { - // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut - VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); - } - bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); - - // Create dummy def RefPositions - - if (needsDummyDefs) - { - // If we are using locations from a predecessor, we should never require DummyDefs. - assert(!predBlockIsAllocated); - - JITDUMP("Creating dummy definitions\n"); - VarSetOps::Iter iter(compiler, newLiveIn); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. - // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs). - if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam)) - { - Interval* interval = getIntervalForLocalVar(varIndex); - RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, - allRegs(interval->registerType)); - } - } - JITDUMP("Finished creating dummy definitions\n\n"); - } - } - - // Add a dummy RefPosition to mark the block boundary. - // Note that we do this AFTER adding the exposed uses above, because the - // register positions for those exposed uses need to be recorded at - // this point. - - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); - currentLoc += 2; - JITDUMP("\n"); - - LIR::Range& blockRange = LIR::AsRange(block); - for (GenTree* node : blockRange.NonPhiNodes()) - { - // We increment the number position of each tree node by 2 to simplify the logic when there's the case of - // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to - // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only - // increment the number position by one. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - node->gtSeqNum = currentLoc; - // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register - // value. - // Although this looks like a no-op it sets the tag. - node->gtRegNum = node->gtRegNum; -#endif - - buildRefPositionsForNode(node, block, listNodePool, currentLoc); - -#ifdef DEBUG - if (currentLoc > maxNodeLocation) - { - maxNodeLocation = currentLoc; - } -#endif // DEBUG - currentLoc += 2; - } - - // Note: the visited set is cleared in LinearScan::doLinearScan() - markBlockVisited(block); - assert(defList.IsEmpty()); - - if (enregisterLocalVars) - { - // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the - // next block, or any unvisited successors. - // This will address lclVars that are live on a backedge, as well as those that are kept - // live at a GT_JMP. - // - // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, - // and jmp call is represented using GT_JMP node which is a leaf node. - // Liveness phase keeps all the arguments of the method live till the end of - // block by adding them to liveout set of the block containing GT_JMP. - // - // The target of a GT_JMP implicitly uses all the current method arguments, however - // there are no actual references to them. This can cause LSRA to assert, because - // the variables are live but it sees no references. In order to correctly model the - // liveness of these arguments, we add dummy exposed uses, in the same manner as for - // backward branches. This will happen automatically via expUseSet. - // - // Note that a block ending with GT_JMP has no successors and hence the variables - // for which dummy use ref positions are added are arguments of the method. - - VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); - VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); - BasicBlock* nextBlock = getNextBlock(); - if (nextBlock != nullptr) - { - VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); - } - for (BasicBlock* succ : block->GetAllSuccs(compiler)) - { - if (VarSetOps::IsEmpty(compiler, expUseSet)) - { - break; - } - - if (isBlockVisited(succ)) - { - continue; - } - VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); - } - - if (!VarSetOps::IsEmpty(compiler, expUseSet)) - { - JITDUMP("Exposed uses:"); - VarSetOps::Iter iter(compiler, expUseSet); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - assert(isCandidateVar(varDsc)); - Interval* interval = getIntervalForLocalVar(varIndex); - RefPosition* pos = - newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); - JITDUMP(" V%02u", varNum); - } - JITDUMP("\n"); - } - - // Clear the "last use" flag on any vars that are live-out from this block. - { - VarSetOps::Iter iter(compiler, block->bbLiveOut); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* const varDsc = &compiler->lvaTable[varNum]; - if (isCandidateVar(varDsc)) - { - RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; - if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) - { - lastRP->lastUse = false; - } - } - } - } - -#ifdef DEBUG - checkLastUses(block); - - if (VERBOSE) - { - printf("use: "); - dumpConvertedVarSet(compiler, block->bbVarUse); - printf("\ndef: "); - dumpConvertedVarSet(compiler, block->bbVarDef); - printf("\n"); - } -#endif // DEBUG - } - - prevBlock = block; - } - - if (enregisterLocalVars) - { - if (compiler->lvaKeepAliveAndReportThis()) - { - // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end - unsigned keepAliveVarNum = compiler->info.compThisArg; - assert(compiler->info.compIsStatic == false); - LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum; - if (isCandidateVar(varDsc)) - { - JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n"); - Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); - RefPosition* pos = - newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); - } - } - -#ifdef DEBUG - if (getLsraExtendLifeTimes()) - { - LclVarDsc* varDsc; - for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) - { - if (varDsc->lvLRACandidate) - { - JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum); - Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); - RefPosition* pos = - newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); - } - } - } -#endif // DEBUG - } - - // If the last block has successors, create a RefTypeBB to record - // what's live - - if (prevBlock->NumSucc(compiler) > 0) - { - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); - } - -#ifdef DEBUG - // Make sure we don't have any blocks that were not visited - foreach_block(compiler, block) - { - assert(isBlockVisited(block)); - } - - if (VERBOSE) - { - lsraDumpIntervals("BEFORE VALIDATING INTERVALS"); - dumpRefPositions("BEFORE VALIDATING INTERVALS"); - validateIntervals(); - } -#endif // DEBUG -} - #ifdef DEBUG void LinearScan::dumpVarRefPositions(const char* title) { @@ -4838,46 +2405,6 @@ void LinearScan::dumpVarRefPositions(const char* title) } } -void LinearScan::validateIntervals() -{ - if (enregisterLocalVars) - { - for (unsigned i = 0; i < compiler->lvaTrackedCount; i++) - { - if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate) - { - continue; - } - Interval* interval = getIntervalForLocalVar(i); - - bool defined = false; - printf("-----------------\n"); - for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) - { - ref->dump(); - RefType refType = ref->refType; - if (!defined && RefTypeIsUse(refType)) - { - if (compiler->info.compMethodName != nullptr) - { - printf("%s: ", compiler->info.compMethodName); - } - printf("LocalVar V%02u: undefined use at %u\n", interval->varNum, ref->nodeLocation); - } - // Note that there can be multiple last uses if they are on disjoint paths, - // so we can't really check the lastUse flag - if (ref->lastUse) - { - defined = false; - } - if (RefTypeIsDef(refType)) - { - defined = true; - } - } - } - } -} #endif // DEBUG // Set the default rpFrameType based upon codeGen->isFramePointerRequired() @@ -10847,206 +8374,6 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, } } -//------------------------------------------------------------------------ -// GetIndirInfo: Get the source registers for an indirection that might be contained. -// -// Arguments: -// node - The node of interest -// -// Return Value: -// The number of source registers used by the *parent* of this node. -// -// Notes: -// Adds the defining node for each register to the useList. -// -int LinearScan::GetIndirInfo(GenTreeIndir* indirTree) -{ - GenTree* const addr = indirTree->gtOp1; - if (!addr->isContained()) - { - appendLocationInfoToList(addr); - return 1; - } - if (!addr->OperIs(GT_LEA)) - { - return 0; - } - - GenTreeAddrMode* const addrMode = addr->AsAddrMode(); - - unsigned srcCount = 0; - if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) - { - appendLocationInfoToList(addrMode->Base()); - srcCount++; - } - if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) - { - appendLocationInfoToList(addrMode->Index()); - srcCount++; - } - return srcCount; -} - -//------------------------------------------------------------------------ -// GetOperandInfo: Get the source registers for an operand that might be contained. -// -// Arguments: -// node - The node of interest -// useList - The list of uses for the node that we're currently processing -// -// Return Value: -// The number of source registers used by the *parent* of this node. -// -// Notes: -// Adds the defining node for each register to the given useList. -// -int LinearScan::GetOperandInfo(GenTree* node) -{ - if (!node->isContained()) - { - appendLocationInfoToList(node); - return 1; - } - -#if !defined(_TARGET_64BIT_) - if (node->OperIs(GT_LONG)) - { - return appendBinaryLocationInfoToList(node->AsOp()); - } -#endif // !defined(_TARGET_64BIT_) - if (node->OperIsIndir()) - { - const unsigned srcCount = GetIndirInfo(node->AsIndir()); - return srcCount; - } - - return 0; -} - -//------------------------------------------------------------------------ -// GetOperandInfo: Get the source registers for an operand that might be contained. -// -// Arguments: -// node - The node of interest -// useList - The list of uses for the node that we're currently processing -// -// Return Value: -// The number of source registers used by the *parent* of this node. -// -// Notes: -// Adds the defining node for each register to the useList. -// -int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo) -{ - LocationInfoListNode* prevLast = useList.Last(); - int srcCount = GetOperandInfo(node); - if (prevLast == nullptr) - { - *pFirstInfo = useList.Begin(); - } - else - { - *pFirstInfo = prevLast->Next(); - } - return srcCount; -} - -void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node) -{ - _dstCount = 0; - _srcCount = 0; - _internalIntCount = 0; - _internalFloatCount = 0; - - isLocalDefUse = false; - isDelayFree = false; - hasDelayFreeSrc = false; - isTgtPref = false; - isInternalRegDelayFree = false; - - regMaskTP dstCandidates; - - // if there is a reg indicated on the tree node, use that for dstCandidates - // the exception is the NOP, which sometimes show up around late args. - // TODO-Cleanup: get rid of those NOPs. - if (node->gtRegNum == REG_STK) - { - dstCandidates = RBM_NONE; - } - else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP) - { -#ifdef ARM_SOFTFP - if (node->OperGet() == GT_PUTARG_REG) - { - dstCandidates = lsra->allRegs(TYP_INT); - } - else -#endif - { - dstCandidates = lsra->allRegs(node->TypeGet()); - } - } - else - { - dstCandidates = genRegMask(node->gtRegNum); - } - - setDstCandidates(lsra, dstCandidates); - srcCandsIndex = dstCandsIndex; - - setInternalCandidates(lsra, lsra->allRegs(TYP_INT)); - -#ifdef DEBUG - isInitialized = true; -#endif - - assert(IsValid(lsra)); -} - -regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra) -{ - return lsra->GetRegMaskForIndex(srcCandsIndex); -} - -void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask) -{ - LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); - assert(FitsIn<unsigned char>(i)); - srcCandsIndex = (unsigned char)i; -} - -regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra) -{ - return lsra->GetRegMaskForIndex(dstCandsIndex); -} - -void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask) -{ - LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); - assert(FitsIn<unsigned char>(i)); - dstCandsIndex = (unsigned char)i; -} - -regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra) -{ - return lsra->GetRegMaskForIndex(internalCandsIndex); -} - -void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask) -{ - LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); - assert(FitsIn<unsigned char>(i)); - internalCandsIndex = (unsigned char)i; -} - -void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask) -{ - LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask); - assert(FitsIn<unsigned char>(i)); - internalCandsIndex = (unsigned char)i; -} - #if TRACK_LSRA_STATS // ---------------------------------------------------------- // updateLsraStat: Increment LSRA stat counter. @@ -11616,18 +8943,7 @@ void LinearScan::lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDes } //------------------------------------------------------------------------ -// ComputeOperandDstCount: computes the number of registers defined by a -// node. -// -// For most nodes, this is simple: -// - Nodes that do not produce values (e.g. stores and other void-typed -// nodes) and nodes that immediately use the registers they define -// produce no registers -// - Nodes that are marked as defining N registers define N registers. -// -// For contained nodes, however, things are more complicated: for purposes -// of bookkeeping, a contained node is treated as producing the transitive -// closure of the registers produced by its sources. +// DumpOperandDefs: dumps the registers defined by a node. // // Arguments: // operand - The operand for which to compute a register count. diff --git a/src/jit/lsra.h b/src/jit/lsra.h index a4981417cf..1814a55c01 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -30,15 +30,60 @@ const unsigned int MaxLocation = UINT_MAX; const unsigned int MaxInternalRegisters = 8; const unsigned int RegisterTypeCount = 2; +/***************************************************************************** +* Register types +*****************************************************************************/ typedef var_types RegisterType; #define IntRegisterType TYP_INT #define FloatRegisterType TYP_FLOAT +//------------------------------------------------------------------------ +// regType: Return the RegisterType to use for a given type +// +// Arguments: +// type - the type of interest +// +template <class T> +RegisterType regType(T type) +{ +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(type)) + { + return FloatRegisterType; + } +#endif // FEATURE_SIMD + return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType; +} + +//------------------------------------------------------------------------ +// useFloatReg: Check if the given var_type should be allocated to a FloatRegisterType +// +inline bool useFloatReg(var_types type) +{ + return (regType(type) == FloatRegisterType); +} + +//------------------------------------------------------------------------ +// registerTypesEquivalent: Check to see if two RegisterTypes are equivalent +// +inline bool registerTypesEquivalent(RegisterType a, RegisterType b) +{ + return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b); +} + +//------------------------------------------------------------------------ +// registerTypesEquivalent: Get the set of callee-save registers of the given RegisterType +// inline regMaskTP calleeSaveRegs(RegisterType rt) { return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED; } +//------------------------------------------------------------------------ +// LocationInfo: Captures the necessary information for a node that is "in-flight" +// during `buildIntervals` (i.e. its definition has been encountered, +// but not its use). +// struct LocationInfo { Interval* interval; @@ -87,7 +132,7 @@ public: // node during `buildIntervals`. // // This list of 'LocationInfoListNode's contains the source nodes consumed by -// a node, and is created by 'TreeNodeInfoInit'. +// a node, and is created by 'BuildNode'. // class LocationInfoList final { @@ -228,10 +273,10 @@ public: } //------------------------------------------------------------------------ - // GetTreeNodeInfo - retrieve the TreeNodeInfo for the given node + // removeListNode - retrieve the TreeNodeInfo for the given node // // Notes: - // The TreeNodeInfoInit methods use this helper to retrieve the TreeNodeInfo for child nodes + // The BuildNode methods use this helper to retrieve the TreeNodeInfo for child nodes // from the useList being constructed. Note that, if the user knows the order of the operands, // it is expected that they should just retrieve them directly. @@ -260,7 +305,7 @@ public: } prevListNode = listNode; } - assert(!"GetTreeNodeInfo didn't find the node"); + assert(!"removeListNode didn't find the node"); unreached(); } @@ -268,7 +313,7 @@ public: // GetTreeNodeInfo - retrieve the TreeNodeInfo for the given node // // Notes: - // The TreeNodeInfoInit methods use this helper to retrieve the TreeNodeInfo for child nodes + // The Build methods use this helper to retrieve the TreeNodeInfo for child nodes // from the useList being constructed. Note that, if the user knows the order of the operands, // it is expected that they should just retrieve them directly. @@ -300,6 +345,35 @@ public: } }; +//------------------------------------------------------------------------ +// LocationInfoListNodePool: manages a pool of `LocationInfoListNode` +// values to decrease overall memory usage +// during `buildIntervals`. +// +// `buildIntervals` involves creating a list of location info values per +// node that either directly produces a set of registers or that is a +// contained node with register-producing sources. However, these lists +// are short-lived: they are destroyed once the use of the corresponding +// node is processed. As such, there is typically only a small number of +// `LocationInfoListNode` values in use at any given time. Pooling these +// values avoids otherwise frequent allocations. +class LocationInfoListNodePool final +{ + LocationInfoListNode* m_freeList; + Compiler* m_compiler; + static const unsigned defaultPreallocation = 8; + +public: + // Creates a pool of `LocationInfoListNode` values. + LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation); + + // Fetches an unused node from the pool. + LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0); + + // Returns a list of nodes to the pool. + void ReturnNodes(LocationInfoList& list); +}; + struct LsraBlockInfo { // bbNum of the predecessor to use for the register location of live-in variables. @@ -623,6 +697,11 @@ public: regMaskTP GetRegMaskForIndex(RegMaskIndex index); void RemoveRegisterFromMasks(regNumber reg); + static bool isSingleRegister(regMaskTP regMask) + { + return (genExactlyOneBit(regMask)); + } + #ifdef DEBUG void dspRegisterMaskTable(); #endif // DEBUG @@ -954,6 +1033,8 @@ private: #ifdef DEBUG void checkLastUses(BasicBlock* block); + static int ComputeOperandDstCount(GenTree* operand); + static int ComputeAvailableSrcCount(GenTree* node); #endif // DEBUG void setFrameType(); @@ -993,10 +1074,7 @@ private: void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); - void buildRefPositionsForNode(GenTree* tree, - BasicBlock* block, - LocationInfoListNodePool& listNodePool, - LsraLocation loc); + void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc); @@ -1053,7 +1131,11 @@ private: void freeRegister(RegRecord* physRegRecord); void freeRegisters(regMaskTP regsToFree); - var_types getDefType(GenTree* tree); + // Get the type that this tree defines. + var_types getDefType(GenTree* tree) + { + return tree->TypeGet(); + } RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask); @@ -1479,19 +1561,27 @@ private: #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE //----------------------------------------------------------------------- - // TreeNodeInfo methods + // Build methods //----------------------------------------------------------------------- + // The listNodePool is used to maintain the TreeNodeInfo for nodes that are "in flight" + // i.e. whose consuming node has not yet been handled. + LocationInfoListNodePool listNodePool; + // The defList is used for the transient TreeNodeInfo that is computed by - // the TreeNodeInfoInit methods, and used in building RefPositions. + // the Build methods, and used in building RefPositions. // When Def RefPositions are built for a node, their NodeInfo is placed // in the defList. As the consuming node is handled, it moves the NodeInfo // into an ordered useList corresponding to the uses for that node. LocationInfoList defList; - // The useList is constructed for each node by the TreeNodeInfoInit methods. + + // The useList is constructed for each node by the Build methods. // It contains the TreeNodeInfo for its operands, in their order of use. LocationInfoList useList; + // During the build phase, this is the NodeInfo for the current node. + TreeNodeInfo* currentNodeInfo; + // Remove the LocationInfoListNode for the given node from the defList, and put it into the useList. // The node must not be contained, and must have been processed by buildRefPositionsForNode(). void appendLocationInfoToList(GenTree* node) @@ -1545,34 +1635,34 @@ private: } // This is the main entry point for computing the TreeNodeInfo for a node. - void TreeNodeInfoInit(GenTree* stmt, TreeNodeInfo* info); + void BuildNode(GenTree* stmt); - void TreeNodeInfoInitCheckByteable(GenTree* tree, TreeNodeInfo* info); + void BuildCheckByteable(GenTree* tree); bool CheckAndSetDelayFree(GenTree* delayUseSrc); - void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info); + void BuildSimple(GenTree* tree); int GetOperandInfo(GenTree* node); int GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo); int GetIndirInfo(GenTreeIndir* indirTree); - void HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs); + void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); - void TreeNodeInfoInitStoreLoc(GenTree* tree, TreeNodeInfo* info); - void TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info); + void BuildStoreLoc(GenTree* tree); + void BuildReturn(GenTree* tree); // This method, unlike the others, returns the number of sources, since it may be called when // 'tree' is contained. - int TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info); - void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info); - void TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info); - void TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info); - void TreeNodeInfoInitStructArg(GenTree* structArg, TreeNodeInfo* info); - void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info); - void TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info); - void TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info); - void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree, TreeNodeInfo* info); - void TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info); - void TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info); - void TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info); + int BuildShiftRotate(GenTree* tree); + void BuildPutArgReg(GenTreeUnOp* node); + void BuildCall(GenTreeCall* call); + void BuildCmp(GenTree* tree); + void BuildStructArg(GenTree* structArg); + void BuildBlockStore(GenTreeBlk* blkNode); + void BuildModDiv(GenTree* tree); + void BuildIntrinsic(GenTree* tree); + void BuildStoreLoc(GenTreeLclVarCommon* tree); + void BuildIndir(GenTreeIndir* indirTree); + void BuildGCWriteBarrier(GenTree* tree); + void BuildCast(GenTree* tree); #ifdef _TARGET_X86_ bool ExcludeNonByteableRegisters(GenTree* tree); @@ -1581,23 +1671,23 @@ private: #if defined(_TARGET_XARCH_) // returns true if the tree can use the read-modify-write memory instruction form bool isRMWRegOper(GenTree* tree); - void TreeNodeInfoInitMul(GenTree* tree, TreeNodeInfo* info); + void BuildMul(GenTree* tree); void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0); #endif // defined(_TARGET_XARCH_) #ifdef FEATURE_SIMD - void TreeNodeInfoInitSIMD(GenTreeSIMD* tree, TreeNodeInfo* info); + void BuildSIMD(GenTreeSIMD* tree); #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS - void TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info); + void BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree); #endif // FEATURE_HW_INTRINSICS - void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeInfo* info); + void BuildPutArgStk(GenTreePutArgStk* argNode); #ifdef _TARGET_ARM_ - void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree, TreeNodeInfo* info); + void BuildPutArgSplit(GenTreePutArgSplit* tree); #endif - void TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info); + void BuildLclHeap(GenTree* tree); }; /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp index 6e0cf22d40..fb3df19139 100644 --- a/src/jit/lsraarm.cpp +++ b/src/jit/lsraarm.cpp @@ -29,86 +29,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" #include "lsra.h" -//------------------------------------------------------------------------ -// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info) -{ - GenTree* op1 = tree->gtGetOp1(); - - assert(info->dstCount == 0); - if (tree->TypeGet() == TYP_LONG) - { - assert((op1->OperGet() == GT_LONG) && op1->isContained()); - GenTree* loVal = op1->gtGetOp1(); - GenTree* hiVal = op1->gtGetOp2(); - info->srcCount = 2; - LocationInfoListNode* loValInfo = getLocationInfo(loVal); - LocationInfoListNode* hiValInfo = getLocationInfo(hiVal); - loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO); - hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI); - useList.Append(loValInfo); - useList.Append(hiValInfo); - } - else if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) - { - regMaskTP useCandidates = RBM_NONE; - - info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1; - - if (varTypeIsStruct(tree)) - { - // op1 has to be either an lclvar or a multi-reg returning call - if (op1->OperGet() != GT_LCL_VAR) - { - noway_assert(op1->IsMultiRegCall()); - - ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); - useCandidates = retTypeDesc->GetABIReturnRegs(); - } - } - else - { - // Non-struct type return - determine useCandidates - switch (tree->TypeGet()) - { - case TYP_VOID: - useCandidates = RBM_NONE; - break; - case TYP_FLOAT: - useCandidates = RBM_FLOATRET; - break; - case TYP_DOUBLE: - // We ONLY want the valid double register in the RBM_DOUBLERET mask. - useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); - break; - case TYP_LONG: - useCandidates = RBM_LNGRET; - break; - default: - useCandidates = RBM_INTRET; - break; - } - } - - LocationInfoListNode* locationInfo = getLocationInfo(op1); - if (useCandidates != RBM_NONE) - { - locationInfo->info.setSrcCandidates(this, useCandidates); - } - useList.Append(locationInfo); - } -} - -void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildLclHeap(GenTree* tree) { + TreeNodeInfo* info = currentNodeInfo; assert(info->dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenarm.cpp): @@ -190,7 +113,7 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info) } //------------------------------------------------------------------------ -// TreeNodeInfoInit: Set the register requirements for RA. +// BuildNode: Set the register requirements for RA. // // Notes: // Takes care of annotating the register requirements @@ -205,10 +128,11 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info) // requirements needed by LSRA to build the Interval Table (source, // destination and internal [temp] register counts). // -void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildNode(GenTree* tree) { - unsigned kind = tree->OperKind(); - RegisterType registerType = TypeGet(tree); + TreeNodeInfo* info = currentNodeInfo; + unsigned kind = tree->OperKind(); + RegisterType registerType = TypeGet(tree); if (tree->isContained()) { @@ -238,7 +162,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: - TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info); + BuildStoreLoc(tree->AsLclVarCommon()); break; case GT_NOP: @@ -273,7 +197,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) assert(info->dstCount == 1); break; default: - NYI_ARM("LinearScan::TreeNodeInfoInit for GT_INTRINSIC"); + NYI_ARM("LinearScan::Build for GT_INTRINSIC"); break; } } @@ -492,7 +416,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_RETURN: - TreeNodeInfoInitReturn(tree, info); + BuildReturn(tree); break; case GT_RETFILT: @@ -630,7 +554,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_ROR: case GT_LSH_HI: case GT_RSH_LO: - TreeNodeInfoInitShiftRotate(tree, info); + BuildShiftRotate(tree); break; case GT_EQ: @@ -640,7 +564,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_GE: case GT_GT: case GT_CMP: - TreeNodeInfoInitCmp(tree, info); + BuildCmp(tree); break; case GT_CKFINITE: @@ -651,7 +575,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_CALL: - TreeNodeInfoInitCall(tree->AsCall(), info); + BuildCall(tree->AsCall()); break; case GT_ADDR: @@ -668,7 +592,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: - TreeNodeInfoInitBlockStore(tree->AsBlk(), info); + BuildBlockStore(tree->AsBlk()); break; case GT_INIT_VAL: @@ -677,7 +601,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_LCLHEAP: - TreeNodeInfoInitLclHeap(tree, info); + BuildLclHeap(tree); break; case GT_STOREIND: @@ -688,11 +612,11 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { info->srcCount = 2; - TreeNodeInfoInitGCWriteBarrier(tree, info); + BuildGCWriteBarrier(tree); break; } - TreeNodeInfoInitIndir(tree->AsIndir(), info); + BuildIndir(tree->AsIndir()); // No contained source on ARM. assert(!src->isContained()); info->srcCount++; @@ -712,7 +636,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_IND: assert(info->dstCount == 1); info->srcCount = 1; - TreeNodeInfoInitIndir(tree->AsIndir(), info); + BuildIndir(tree->AsIndir()); break; case GT_CATCH_ARG: @@ -751,15 +675,15 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_PUTARG_SPLIT: - TreeNodeInfoInitPutArgSplit(tree->AsPutArgSplit(), info); + BuildPutArgSplit(tree->AsPutArgSplit()); break; case GT_PUTARG_STK: - TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info); + BuildPutArgStk(tree->AsPutArgStk()); break; case GT_PUTARG_REG: - TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info); + BuildPutArgReg(tree->AsUnOp()); break; case GT_BITCAST: @@ -792,7 +716,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) GenTree::OpName(tree->OperGet())); NYIRAW(message); #else - NYI_ARM("TreeNodeInfoInit default case"); + NYI_ARM("BuildNode default case"); #endif case GT_LCL_FLD: case GT_LCL_FLD_ADDR: @@ -808,19 +732,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_SETCC: case GT_MEMORYBARRIER: case GT_OBJ: - assert(info->dstCount == (tree->IsValue() ? 1 : 0)); - if (kind & (GTK_CONST | GTK_LEAF)) - { - info->srcCount = 0; - } - else if (kind & (GTK_SMPOP)) - { - info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); - } - else - { - unreached(); - } + BuildSimple(tree); break; case GT_INDEX_ADDR: diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index 2bfcfa2651..990f39def0 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -29,7 +29,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" //------------------------------------------------------------------------ -// TreeNodeInfoInit: Set the register requirements for RA. +// BuildNode: Set the register requirements for RA. // // Notes: // Takes care of annotating the register requirements @@ -44,10 +44,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // requirements needed by LSRA to build the Interval Table (source, // destination and internal [temp] register counts). // -void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildNode(GenTree* tree) { - unsigned kind = tree->OperKind(); - RegisterType registerType = TypeGet(tree); + TreeNodeInfo* info = currentNodeInfo; + unsigned kind = tree->OperKind(); + RegisterType registerType = TypeGet(tree); if (tree->isContained()) { @@ -76,25 +77,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) GenTree* op2; default: - if (kind & (GTK_CONST | GTK_LEAF)) - { - info->srcCount = 0; - } - else if (kind & (GTK_SMPOP)) - { - info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); - } - else - { - unreached(); - } + BuildSimple(tree); break; case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: info->srcCount = 1; assert(info->dstCount == 0); - TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info); + BuildStoreLoc(tree->AsLclVarCommon()); break; case GT_LIST: @@ -136,7 +126,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_RETURN: - TreeNodeInfoInitReturn(tree, info); + BuildReturn(tree); break; case GT_RETFILT: @@ -281,13 +271,13 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) #ifdef FEATURE_SIMD case GT_SIMD: - TreeNodeInfoInitSIMD(tree->AsSIMD(), info); + BuildSIMD(tree->AsSIMD()); break; #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWIntrinsic: - TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic(), info); + BuildHWIntrinsic(tree->AsHWIntrinsic()); break; #endif // FEATURE_HW_INTRINSICS @@ -350,7 +340,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_RSH: case GT_RSZ: case GT_ROR: - TreeNodeInfoInitShiftRotate(tree, info); + BuildShiftRotate(tree); break; case GT_EQ: @@ -362,7 +352,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_TEST_EQ: case GT_TEST_NE: case GT_JCMP: - TreeNodeInfoInitCmp(tree, info); + BuildCmp(tree); break; case GT_CKFINITE: @@ -429,15 +419,15 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_PUTARG_STK: - TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info); + BuildPutArgStk(tree->AsPutArgStk()); break; case GT_PUTARG_REG: - TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info); + BuildPutArgReg(tree->AsUnOp()); break; case GT_CALL: - TreeNodeInfoInitCall(tree->AsCall(), info); + BuildCall(tree->AsCall()); break; case GT_ADDR: @@ -461,7 +451,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: - TreeNodeInfoInitBlockStore(tree->AsBlk(), info); + BuildBlockStore(tree->AsBlk()); break; case GT_INIT_VAL: @@ -669,11 +659,11 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { info->srcCount = 2; - TreeNodeInfoInitGCWriteBarrier(tree, info); + BuildGCWriteBarrier(tree); break; } - TreeNodeInfoInitIndir(tree->AsIndir(), info); + BuildIndir(tree->AsIndir()); if (!tree->gtGetOp2()->isContained()) { appendLocationInfoToList(tree->gtGetOp2()); @@ -693,7 +683,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_IND: assert(info->dstCount == 1); - TreeNodeInfoInitIndir(tree->AsIndir(), info); + BuildIndir(tree->AsIndir()); break; case GT_CATCH_ARG: @@ -732,72 +722,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) assert(info->dstCount == tree->GetRegisterDstCount()); } -//------------------------------------------------------------------------ -// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info) -{ - GenTree* op1 = tree->gtGetOp1(); - regMaskTP useCandidates = RBM_NONE; - - info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1; - assert(info->dstCount == 0); - - if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) - { - if (varTypeIsStruct(tree)) - { - // op1 has to be either an lclvar or a multi-reg returning call - if (op1->OperGet() != GT_LCL_VAR) - { - noway_assert(op1->IsMultiRegCall()); - - ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); - useCandidates = retTypeDesc->GetABIReturnRegs(); - } - } - else - { - // Non-struct type return - determine useCandidates - switch (tree->TypeGet()) - { - case TYP_VOID: - useCandidates = RBM_NONE; - break; - case TYP_FLOAT: - useCandidates = RBM_FLOATRET; - break; - case TYP_DOUBLE: - useCandidates = RBM_DOUBLERET; - break; - case TYP_LONG: - useCandidates = RBM_LNGRET; - break; - default: - useCandidates = RBM_INTRET; - break; - } - } - - LocationInfoListNode* locationInfo = getLocationInfo(op1); - if (useCandidates != RBM_NONE) - { - locationInfo->info.setSrcCandidates(this, useCandidates); - } - useList.Append(locationInfo); - } -} - #ifdef FEATURE_SIMD //------------------------------------------------------------------------ -// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree. +// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. // // Arguments: // tree - The GT_SIMD node of interest @@ -805,8 +732,9 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info) // Return Value: // None. -void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) +void LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { + TreeNodeInfo* info = currentNodeInfo; // Only SIMDIntrinsicInit can be contained if (simdTree->isContained()) { @@ -983,7 +911,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) #ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ -// TreeNodeInfoInitHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree. +// BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree. // // Arguments: // tree - The GT_HWIntrinsic node of interest @@ -991,8 +919,9 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) // Return Value: // None. -void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info) +void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { + TreeNodeInfo* info = currentNodeInfo; NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp1); if (intrinsicTree->gtGetOp2IfPresent() != nullptr) diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index 189a37697a..0bdc13ef42 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -29,142 +29,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lsra.h" //------------------------------------------------------------------------ -// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar -// -// Arguments: -// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) -// -// Notes: -// This involves: -// - Setting the appropriate candidates for a store of a multi-reg call return value. -// - Handling of contained immediates. -// -void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNodeInfo* info) -{ - GenTree* op1 = storeLoc->gtGetOp1(); - - assert(info->dstCount == 0); - if (op1->IsMultiRegCall()) - { - // This is the case of var = call where call is returning - // a value in multiple return registers. - // Must be a store lclvar. - assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); - - // srcCount = number of registers in which the value is returned by call - GenTreeCall* call = op1->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = retTypeDesc->GetReturnRegCount(); - info->srcCount = regCount; - - // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 - regMaskTP srcCandidates = allMultiRegCallNodeRegs(call); - LocationInfoListNode* locInfo = getLocationInfo(op1); - locInfo->info.setSrcCandidates(this, srcCandidates); - useList.Append(locInfo); - } -#ifdef _TARGET_ARM_ - else if (varTypeIsLong(op1)) - { - // The only possible operands for a GT_STORE_LCL_VAR are a multireg call node, which we have - // handled above, or a GT_LONG node. - assert(!op1->OperIs(GT_LONG) || op1->isContained()); - info->srcCount = 2; - // TODO: Currently, GetOperandInfo always returns 1 for any non-contained node. - // Consider enhancing it to handle multi-reg nodes. - (void)GetOperandInfo(op1); - } -#endif // _TARGET_ARM_ - else if (op1->isContained()) - { - info->srcCount = 0; - } - else - { - info->srcCount = 1; - appendLocationInfoToList(op1); - } - -#ifdef FEATURE_SIMD - if (varTypeIsSIMD(storeLoc)) - { - if (storeLoc->TypeGet() == TYP_SIMD12) - { - // Need an additional register to extract upper 4 bytes of Vector3. - info->internalIntCount = 1; - } - } -#endif // FEATURE_SIMD -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitCmp: Lower a GT comparison node. -// -// Arguments: -// tree - the node to lower -// -// Return Value: -// None. -// -void LinearScan::TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info) -{ - info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); - - assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID)); - info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; -} - -void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info) -{ - GenTree* dst = tree; - GenTree* addr = tree->gtOp.gtOp1; - GenTree* src = tree->gtOp.gtOp2; - LocationInfoListNode* addrInfo = getLocationInfo(addr); - LocationInfoListNode* srcInfo = getLocationInfo(src); - - // In the case where we are doing a helper assignment, even if the dst - // is an indir through an lea, we need to actually instantiate the - // lea in a register - assert(!addr->isContained() && !src->isContained()); - useList.Append(addrInfo); - useList.Append(srcInfo); - info->srcCount = 2; - assert(info->dstCount == 0); - -#if NOGC_WRITE_BARRIERS - NYI_ARM("NOGC_WRITE_BARRIERS"); - - // For the NOGC JIT Helper calls - // - // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) - // the 'src' goes into x15 (REG_WRITE_BARRIER) - // - addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF); - srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); -#else - // For the standard JIT Helper calls - // op1 goes into REG_ARG_0 and - // op2 goes into REG_ARG_1 - // - addrInfo->info.setSrcCandidates(this, RBM_ARG_0); - srcInfo->info.setSrcCandidates(this, RBM_ARG_1); -#endif // NOGC_WRITE_BARRIERS - - // Both src and dst must reside in a register, which they should since we haven't set - // either of them as contained. - assert(addrInfo->info.dstCount == 1); - assert(srcInfo->info.dstCount == 1); -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitIndir: Specify register requirements for address expression +// BuildIndir: Specify register requirements for address expression // of an indirection operation. // // Arguments: // indirTree - GT_IND, GT_STOREIND or block gentree node // -void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info) +void LinearScan::BuildIndir(GenTreeIndir* indirTree) { + TreeNodeInfo* info = currentNodeInfo; // If this is the rhs of a block copy (i.e. non-enregisterable struct), // it has no register requirements. if (indirTree->TypeGet() == TYP_STRUCT) @@ -239,7 +112,7 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* in } //------------------------------------------------------------------------ -// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate. +// BuildShiftRotate: Set the NodeInfo for a shift or rotate. // // Arguments: // tree - The node of interest @@ -247,10 +120,11 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* in // Return Value: // None. // -int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info) +int LinearScan::BuildShiftRotate(GenTree* tree) { - GenTree* source = tree->gtOp.gtOp1; - GenTree* shiftBy = tree->gtOp.gtOp2; + TreeNodeInfo* info = currentNodeInfo; + GenTree* source = tree->gtOp.gtOp1; + GenTree* shiftBy = tree->gtOp.gtOp2; assert(info->dstCount == 1); if (!shiftBy->isContained()) { @@ -291,84 +165,7 @@ int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info) } //------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. -// -// Arguments: -// node - The PUTARG_REG node. -// argReg - The register in which to pass the argument. -// info - The info for the node's using call. -// isVarArgs - True if the call uses a varargs calling convention. -// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. -// -// Return Value: -// None. -// -void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info) -{ - assert(node != nullptr); - assert(node->OperIsPutArgReg()); - info->srcCount = 1; - regNumber argReg = node->gtRegNum; - assert(argReg != REG_NA); - - // Set the register requirements for the node. - regMaskTP argMask = genRegMask(argReg); - -#ifdef _TARGET_ARM_ - // If type of node is `long` then it is actually `double`. - // The actual `long` types must have been transformed as a field list with two fields. - if (node->TypeGet() == TYP_LONG) - { - info->srcCount++; - info->dstCount = info->srcCount; - assert(genRegArgNext(argReg) == REG_NEXT(argReg)); - argMask |= genRegMask(REG_NEXT(argReg)); - } -#endif // _TARGET_ARM_ - info->setDstCandidates(this, argMask); - info->setSrcCandidates(this, argMask); - - // To avoid redundant moves, have the argument operand computed in the - // register in which the argument is passed to the call. - LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1); - op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this)); - op1Info->info.isDelayFree = true; - useList.Append(op1Info); -} - -//------------------------------------------------------------------------ -// HandleFloatVarArgs: Handle additional register requirements for a varargs call -// -// Arguments: -// call - The call node of interest -// argNode - The current argument -// -// Return Value: -// None. -// -// Notes: -// In the case of a varargs call, the ABI dictates that if we have floating point args, -// we must pass the enregistered arguments in both the integer and floating point registers. -// Since the integer register is not associated with the arg node, we will reserve it as -// an internal register on the call so that it is not used during the evaluation of the call node -// (e.g. for the target). -void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs) -{ -#if FEATURE_VARARG - if (call->IsVarargs() && varTypeIsFloating(argNode)) - { - *callHasFloatRegArgs = true; - - regNumber argReg = argNode->gtRegNum; - regNumber targetReg = compiler->getCallArgIntRegister(argReg); - info->setInternalIntCount(info->internalIntCount + 1); - info->addInternalCandidates(this, genRegMask(targetReg)); - } -#endif // FEATURE_VARARG -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitCall: Set the NodeInfo for a call. +// BuildCall: Set the NodeInfo for a call. // // Arguments: // call - The call node of interest @@ -376,8 +173,9 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTr // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) +void LinearScan::BuildCall(GenTreeCall* call) { + TreeNodeInfo* info = currentNodeInfo; bool hasMultiRegRetVal = false; ReturnTypeDesc* retTypeDesc = nullptr; @@ -478,7 +276,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) GenTree* argNode = list->Current(); #ifdef DEBUG - // During TreeNodeInfoInit, we only use the ArgTabEntry for validation, + // During Build, we only use the ArgTabEntry for validation, // as getting it is rather expensive. fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); regNumber argReg = curArgTabEntry->regNum; @@ -551,7 +349,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) { assert(argNode->OperIs(GT_PUTARG_REG)); assert(argNode->gtRegNum == argReg); - HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs); + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); #ifdef _TARGET_ARM_ // The `double` types have been transformed to `long` on armel, // while the actual long types have been decomposed. @@ -636,7 +434,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) } //------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node +// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node // // Arguments: // argNode - a GT_PUTARG_STK node @@ -647,8 +445,9 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) // Notes: // Set the child node(s) to be contained when we have a multireg arg // -void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeInfo* info) +void LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) { + TreeNodeInfo* info = currentNodeInfo; assert(argNode->gtOper == GT_PUTARG_STK); GenTree* putArgChild = argNode->gtOp.gtOp1; @@ -717,7 +516,7 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeIn #ifdef _TARGET_ARM_ //------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node +// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node // // Arguments: // argNode - a GT_PUTARG_SPLIT node @@ -728,8 +527,9 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeIn // Notes: // Set the child node(s) to be contained // -void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo* info) +void LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) { + TreeNodeInfo* info = currentNodeInfo; assert(argNode->gtOper == GT_PUTARG_SPLIT); GenTree* putArgChild = argNode->gtOp.gtOp1; @@ -807,7 +607,7 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNo #endif // _TARGET_ARM_ //------------------------------------------------------------------------ -// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. +// BuildBlockStore: Set the NodeInfo for a block store. // // Arguments: // blkNode - The block store node of interest @@ -815,11 +615,12 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNo // Return Value: // None. // -void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info) +void LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { - GenTree* dstAddr = blkNode->Addr(); - unsigned size = blkNode->gtBlkSize; - GenTree* source = blkNode->Data(); + TreeNodeInfo* info = currentNodeInfo; + GenTree* dstAddr = blkNode->Addr(); + unsigned size = blkNode->gtBlkSize; + GenTree* source = blkNode->Data(); LocationInfoListNode* dstAddrInfo = nullptr; LocationInfoListNode* sourceInfo = nullptr; diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp new file mode 100644 index 0000000000..0d3cccb6b3 --- /dev/null +++ b/src/jit/lsrabuild.cpp @@ -0,0 +1,3205 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Interval and RefPosition Building XX +XX XX +XX This contains the logic for constructing Intervals and RefPositions that XX +XX is common across architectures. See lsra{arch}.cpp for the architecture- XX +XX specific methods for building. XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator + +#include "lsra.h" + +//------------------------------------------------------------------------ +// LocationInfoListNodePool::LocationInfoListNodePool: +// Creates a pool of `LocationInfoListNode` values. +// +// Arguments: +// compiler - The compiler context. +// preallocate - The number of nodes to preallocate. +// +LocationInfoListNodePool::LocationInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler) +{ + if (preallocate > 0) + { + size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate; + LocationInfoListNode* preallocatedNodes = + reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA)); + + LocationInfoListNode* head = preallocatedNodes; + head->m_next = nullptr; + + for (unsigned i = 1; i < preallocate; i++) + { + LocationInfoListNode* node = &preallocatedNodes[i]; + node->m_next = head; + head = node; + } + + m_freeList = head; + } +} + +//------------------------------------------------------------------------ +// LocationInfoListNodePool::GetNode: Fetches an unused node from the +// pool. +// +// Arguments: +// l - - The `LsraLocation` for the `LocationInfo` value. +// i - The interval for the `LocationInfo` value. +// t - The IR node for the `LocationInfo` value +// regIdx - The register index for the `LocationInfo` value. +// +// Returns: +// A pooled or newly-allocated `LocationInfoListNode`, depending on the +// contents of the pool. +LocationInfoListNode* LocationInfoListNodePool::GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx) +{ + LocationInfoListNode* head = m_freeList; + if (head == nullptr) + { + head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode))); + } + else + { + m_freeList = head->m_next; + } + + head->loc = l; + head->interval = i; + head->treeNode = t; + head->m_next = nullptr; + + return head; +} + +//------------------------------------------------------------------------ +// LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the node +// pool and clears the given list. +// +// Arguments: +// list - The list to return. +// +void LocationInfoListNodePool::ReturnNodes(LocationInfoList& list) +{ + assert(list.m_head != nullptr); + assert(list.m_tail != nullptr); + + LocationInfoListNode* head = m_freeList; + list.m_tail->m_next = head; + m_freeList = list.m_head; + + list.m_head = nullptr; + list.m_tail = nullptr; +} + +//------------------------------------------------------------------------ +// newInterval: Create a new Interval of the given RegisterType. +// +// Arguments: +// theRegisterType - The type of Interval to create. +// +// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately +// set such fields as isStructField +// +Interval* LinearScan::newInterval(RegisterType theRegisterType) +{ + intervals.emplace_back(theRegisterType, allRegs(theRegisterType)); + Interval* newInt = &intervals.back(); + +#ifdef DEBUG + newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1); +#endif // DEBUG + + DBEXEC(VERBOSE, newInt->dump()); + return newInt; +} + +//------------------------------------------------------------------------ +// newRefPositionRaw: Create a new RefPosition +// +// Arguments: +// nodeLocation - The location of the reference. +// treeNode - The GenTree of the reference. +// refType - The type of reference +// +// Notes: +// This is used to create RefPositions for both RegRecords and Intervals, +// so it does only the common initialization. +// +RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) +{ + refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType); + RefPosition* newRP = &refPositions.back(); +#ifdef DEBUG + newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1); +#endif // DEBUG + return newRP; +} + +//------------------------------------------------------------------------ +// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use +// register requirements on a single-def, single-use interval. +// +// Arguments: +// defRefPosition - The interval definition +// useRefPosition - The (sole) interval use +// +// Return Value: +// None. +// +// Assumptions: +// The two RefPositions are for the same interval, which is a tree-temp. +// +// Notes: +// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg. +// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that, +// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs +// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4 +// below. +// Here are the cases we consider (in this order): +// 1. If The defRefPosition specifies a single register, and there are no conflicting +// FixedReg uses of it between the def and use, we use that register, and the code generator +// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def. +// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no +// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator +// will insert the copy. +// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined +// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register), +/// we set the register requirements on the defRefPosition to the use registers, and the +// code generator will insert a copy on the def. We can't rely on the code generator to put a copy +// on the use if it has multiple possible candidates, as it won't know which one has been allocated. +// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register +// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set +// the useRefPosition to the def registers, for similar reasons to case #3. +// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts, +// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are +// single registers, codegen can insert the copy. +// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but +// has a conflict), we must insert a copy. The copy will be inserted before the use if the +// use is not fixed (in the fixed case, the code generator will insert the use). +// +// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is +// available for the lifetime. We end up allocating a register that must be spilled, and it probably +// won't be the register that is actually defined by the target instruction. So, we have to copy it +// and THEN spill it. In this case, we should be using the def requirement. But we need to change +// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but +// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway +// we can use the fixed-reg on the def. +// + +void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition) +{ + assert(!interval->isLocalVar); + + RefPosition* useRefPosition = defRefPosition->nextRefPosition; + regMaskTP defRegAssignment = defRefPosition->registerAssignment; + regMaskTP useRegAssignment = useRefPosition->registerAssignment; + RegRecord* defRegRecord = nullptr; + RegRecord* useRegRecord = nullptr; + regNumber defReg = REG_NA; + regNumber useReg = REG_NA; + bool defRegConflict = false; + bool useRegConflict = false; + + // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment + // on it, or we will fail to ensure that the fixedReg is busy at the time the target + // (of the node that uses this interval) is allocated. + bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT)); + if (!canChangeUseAssignment) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE)); + } + if (defRefPosition->isFixedRegRef) + { + defReg = defRefPosition->assignedReg(); + defRegRecord = getRegisterRecord(defReg); + if (canChangeUseAssignment) + { + RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; + assert(currFixedRegRefPosition != nullptr && + currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation); + + if (currFixedRegRefPosition->nextRefPosition == nullptr || + currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation()) + { + // This is case #1. Use the defRegAssignment + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1)); + useRefPosition->registerAssignment = defRegAssignment; + return; + } + else + { + defRegConflict = true; + } + } + } + if (useRefPosition->isFixedRegRef) + { + useReg = useRefPosition->assignedReg(); + useRegRecord = getRegisterRecord(useReg); + RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition; + + // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null. + RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition(); + assert(nextFixedRegRefPosition != nullptr && + nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation); + + // First, check to see if there are any conflicting FixedReg references between the def and use. + if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation) + { + // OK, no conflicting FixedReg references. + // Now, check to see whether it is currently in use. + if (useRegRecord->assignedInterval != nullptr) + { + RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition; + LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation(); + if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation) + { + useRegConflict = true; + } + } + if (!useRegConflict) + { + // This is case #2. Use the useRegAssignment + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2)); + defRefPosition->registerAssignment = useRegAssignment; + return; + } + } + else + { + useRegConflict = true; + } + } + if (defRegRecord != nullptr && !useRegConflict) + { + // This is case #3. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3)); + defRefPosition->registerAssignment = useRegAssignment; + return; + } + if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) + { + // This is case #4. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4)); + useRefPosition->registerAssignment = defRegAssignment; + return; + } + if (defRegRecord != nullptr && useRegRecord != nullptr) + { + // This is case #5. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5)); + RegisterType regType = interval->registerType; + assert((getRegisterType(interval, defRefPosition) == regType) && + (getRegisterType(interval, useRefPosition) == regType)); + regMaskTP candidates = allRegs(regType); + defRefPosition->registerAssignment = candidates; + return; + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6)); + return; +} + +//------------------------------------------------------------------------ +// applyCalleeSaveHeuristics: Set register preferences for an interval based on the given RefPosition +// +// Arguments: +// rp - The RefPosition of interest +// +// Notes: +// This is slightly more general than its name applies, and updates preferences not just +// for callee-save registers. +// +void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) +{ +#ifdef _TARGET_AMD64_ + if (compiler->opts.compDbgEnC) + { + // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs. + return; + } +#endif // _TARGET_AMD64_ + + Interval* theInterval = rp->getInterval(); + +#ifdef DEBUG + if (!doReverseCallerCallee()) +#endif // DEBUG + { + // Set preferences so that this register set will be preferred for earlier refs + theInterval->updateRegisterPreferences(rp->registerAssignment); + } +} + +//------------------------------------------------------------------------ +// checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps. +// +// Arguments: +// useRP - The use RefPosition of a tree temp (SDSU Interval) +// +// Notes: +// There are a couple of cases where this may over-constrain allocation: +// 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or +// 2. In the case where the defining node requires a temp distinct from the target (also a +// delay-free case). +// In those cases, if we propagate a single-register restriction from the consumer to the producer +// the delayed uses will not see a fixed reference in the PhysReg at that position, and may +// incorrectly allocate that register. +// TODO-CQ: This means that we may often require a copy at the use of this node's result. +// This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is +// created, causing a RefTypeFixedRef to be added at that location. This, however, results in +// more PhysReg RefPositions (a throughput impact), and a large number of diffs that require +// further analysis to determine benefit. +// See Issue #11274. +// +void LinearScan::checkConflictingDefUse(RefPosition* useRP) +{ + assert(useRP->refType == RefTypeUse); + Interval* theInterval = useRP->getInterval(); + assert(!theInterval->isLocalVar); + + RefPosition* defRP = theInterval->firstRefPosition; + + // All defs must have a valid treeNode, but we check it below to be conservative. + assert(defRP->treeNode != nullptr); + regMaskTP prevAssignment = defRP->registerAssignment; + regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); + if (newAssignment != RBM_NONE) + { + if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) + { + defRP->registerAssignment = newAssignment; + } + } + else + { + theInterval->hasConflictingDefUse = true; + } +} + +//------------------------------------------------------------------------ +// associateRefPosWithInterval: Update the Interval based on the given RefPosition. +// +// Arguments: +// rp - The RefPosition of interest +// +// Notes: +// This is called at the time when 'rp' has just been created, so it becomes +// the nextRefPosition of the recentRefPosition, and both the recentRefPosition +// and lastRefPosition of its referent. +// +void LinearScan::associateRefPosWithInterval(RefPosition* rp) +{ + Referenceable* theReferent = rp->referent; + + if (theReferent != nullptr) + { + // All RefPositions except the dummy ones at the beginning of blocks + + if (rp->isIntervalRef()) + { + Interval* theInterval = rp->getInterval(); + + applyCalleeSaveHeuristics(rp); + + if (theInterval->isLocalVar) + { + if (RefTypeIsUse(rp->refType)) + { + RefPosition* const prevRP = theInterval->recentRefPosition; + if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum)) + { + prevRP->lastUse = false; + } + } + + rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) && + (rp->refType != RefTypeZeroInit) && !extendLifetimes(); + } + else if (rp->refType == RefTypeUse) + { + checkConflictingDefUse(rp); + rp->lastUse = true; + } + } + + RefPosition* prevRP = theReferent->recentRefPosition; + if (prevRP != nullptr) + { + prevRP->nextRefPosition = rp; + } + else + { + theReferent->firstRefPosition = rp; + } + theReferent->recentRefPosition = rp; + theReferent->lastRefPosition = rp; + } + else + { + assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); + } +} + +//--------------------------------------------------------------------------- +// newRefPosition: allocate and initialize a new RefPosition. +// +// Arguments: +// reg - reg number that identifies RegRecord to be associated +// with this RefPosition +// theLocation - LSRA location of RefPosition +// theRefType - RefPosition type +// theTreeNode - GenTree node for which this RefPosition is created +// mask - Set of valid registers for this RefPosition +// multiRegIdx - register position if this RefPosition corresponds to a +// multi-reg call node. +// +// Return Value: +// a new RefPosition +// +RefPosition* LinearScan::newRefPosition( + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) +{ + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); + + newRP->setReg(getRegisterRecord(reg)); + newRP->registerAssignment = mask; + + newRP->setMultiRegIdx(0); + newRP->setAllocateIfProfitable(false); + + associateRefPosWithInterval(newRP); + + DBEXEC(VERBOSE, newRP->dump()); + return newRP; +} + +//--------------------------------------------------------------------------- +// newRefPosition: allocate and initialize a new RefPosition. +// +// Arguments: +// theInterval - interval to which RefPosition is associated with. +// theLocation - LSRA location of RefPosition +// theRefType - RefPosition type +// theTreeNode - GenTree node for which this RefPosition is created +// mask - Set of valid registers for this RefPosition +// multiRegIdx - register position if this RefPosition corresponds to a +// multi-reg call node. +// +// Return Value: +// a new RefPosition +// +RefPosition* LinearScan::newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskTP mask, + unsigned multiRegIdx /* = 0 */) +{ +#ifdef DEBUG + if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) + { + // In the case we're using floating point registers we must make sure + // this flag was set previously in the compiler since this will mandate + // whether LSRA will take into consideration FP reg killsets. + assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0)); + } +#endif // DEBUG + + // If this reference is constrained to a single register (and it's not a dummy + // or Kill reftype already), add a RefTypeFixedReg at this location so that its + // availability can be more accurately determined + + bool isFixedRegister = isSingleRegister(mask); + bool insertFixedRef = false; + if (isFixedRegister) + { + // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB) + if (theRefType == RefTypeUse || theRefType == RefTypeDef) + { + insertFixedRef = true; + } + } + + if (insertFixedRef) + { + regNumber physicalReg = genRegNumFromMask(mask); + RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); + assert(theInterval != nullptr); + assert((allRegs(theInterval->registerType) & mask) != 0); + } + + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); + + newRP->setInterval(theInterval); + + // Spill info + newRP->isFixedRegRef = isFixedRegister; + +#ifndef _TARGET_AMD64_ + // We don't need this for AMD because the PInvoke method epilog code is explicit + // at register allocation time. + if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged && + theInterval->varNum == compiler->genReturnLocal) + { + mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); + noway_assert(mask != RBM_NONE); + } +#endif // !_TARGET_AMD64_ + newRP->registerAssignment = mask; + + newRP->setMultiRegIdx(multiRegIdx); + newRP->setAllocateIfProfitable(false); + + associateRefPosWithInterval(newRP); + + DBEXEC(VERBOSE, newRP->dump()); + return newRP; +} + +//------------------------------------------------------------------------ +// IsContainableMemoryOp: Checks whether this is a memory op that can be contained. +// +// Arguments: +// node - the node of interest. +// +// Return value: +// True if this will definitely be a memory reference that could be contained. +// +// Notes: +// This differs from the isMemoryOp() method on GenTree because it checks for +// the case of doNotEnregister local. This won't include locals that +// for some other reason do not become register candidates, nor those that get +// spilled. +// Also, because we usually call this before we redo dataflow, any new lclVars +// introduced after the last dataflow analysis will not yet be marked lvTracked, +// so we don't use that. +// +bool LinearScan::isContainableMemoryOp(GenTree* node) +{ + if (node->isMemoryOp()) + { + return true; + } + if (node->IsLocal()) + { + if (!enregisterLocalVars) + { + return true; + } + LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum]; + return varDsc->lvDoNotEnregister; + } + return false; +} + +//------------------------------------------------------------------------ +// addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'. +// +// Arguments: +// mask - the mask (set) of registers. +// currentLoc - the location at which they should be added +// refType - the type of refposition +// isLastUse - true IFF this is a last use of the register +// +void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) +{ + for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) + { + if (mask & 1) + { + // This assumes that these are all "special" RefTypes that + // don't need to be recorded on the tree (hence treeNode is nullptr) + RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, + genRegMask(reg)); // This MUST occupy the physical register (obviously) + + if (isLastUse) + { + pos->lastUse = true; + } + } + } +} + +//------------------------------------------------------------------------ +// getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node. +// If the GT_STOREIND will generate a write barrier, determine the specific kill +// set required by the case-specific, platform-specific write barrier. If no +// write barrier is required, the kill set will be RBM_NONE. +// +// Arguments: +// tree - the GT_STOREIND node +// +// Return Value: a register mask of the registers killed +// +regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) +{ + assert(tree->OperIs(GT_STOREIND)); + + regMaskTP killMask = RBM_NONE; + + GenTree* data = tree->Data(); + + GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm)) + { + // We can't determine the exact helper to be used at this point, because it depends on + // the allocated register for the `data` operand. However, all the (x86) optimized + // helpers have the same kill set: EDX. + killMask = RBM_CALLEE_TRASH_NOGC; + } + else + { + // Figure out which helper we're going to use, and then get the kill set for that helper. + CorInfoHelpFunc helper = + compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm); + killMask = compiler->compHelperCallKillSet(helper); + } + } + + return killMask; +} + +//------------------------------------------------------------------------ +// getKillSetForNode: Return the registers killed by the given tree node. +// +// Arguments: +// compiler - the compiler context to use +// tree - the tree for which the kill set is needed. +// +// Return Value: a register mask of the registers killed +// +regMaskTP LinearScan::getKillSetForNode(GenTree* tree) +{ + regMaskTP killMask = RBM_NONE; + switch (tree->OperGet()) + { +#ifdef _TARGET_XARCH_ + case GT_MUL: + // We use the 128-bit multiply when performing an overflow checking unsigned multiply + // + if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx()) + { + // Both RAX and RDX are killed by the operation + killMask = RBM_RAX | RBM_RDX; + } + break; + + case GT_MULHI: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif + killMask = RBM_RAX | RBM_RDX; + break; + + case GT_MOD: + case GT_DIV: + case GT_UMOD: + case GT_UDIV: + if (!varTypeIsFloating(tree->TypeGet())) + { + // Both RAX and RDX are killed by the operation + killMask = RBM_RAX | RBM_RDX; + } + break; +#endif // _TARGET_XARCH_ + + case GT_STORE_OBJ: + if (tree->OperIsCopyBlkOp()) + { + assert(tree->AsObj()->gtGcPtrCount != 0); + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF); + break; + } + __fallthrough; + + case GT_STORE_BLK: + case GT_STORE_DYN_BLK: + { + GenTreeBlk* blkNode = tree->AsBlk(); + bool isCopyBlk = varTypeIsStruct(blkNode->Data()); + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindHelper: + if (isCopyBlk) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); + } + else + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); + } + break; + +#ifdef _TARGET_XARCH_ + case GenTreeBlk::BlkOpKindRepInstr: + if (isCopyBlk) + { + // rep movs kills RCX, RDI and RSI + killMask = RBM_RCX | RBM_RDI | RBM_RSI; + } + else + { + // rep stos kills RCX and RDI. + // (Note that the Data() node, if not constant, will be assigned to + // RCX, but it's find that this kills it, as the value is not available + // after this node in any case.) + killMask = RBM_RDI | RBM_RCX; + } + break; +#else + case GenTreeBlk::BlkOpKindRepInstr: +#endif + case GenTreeBlk::BlkOpKindUnroll: + case GenTreeBlk::BlkOpKindInvalid: + // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE + break; + } + } + break; + + case GT_RETURNTRAP: + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + break; + case GT_CALL: +#ifdef _TARGET_X86_ + if (compiler->compFloatingPointUsed) + { + if (tree->TypeGet() == TYP_DOUBLE) + { + needDoubleTmpForFPCall = true; + } + else if (tree->TypeGet() == TYP_FLOAT) + { + needFloatTmpForFPCall = true; + } + } +#endif // _TARGET_X86_ +#if defined(_TARGET_X86_) || defined(_TARGET_ARM_) + if (tree->IsHelperCall()) + { + GenTreeCall* call = tree->AsCall(); + CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); + killMask = compiler->compHelperCallKillSet(helpFunc); + } + else +#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) + { + // if there is no FP used, we can ignore the FP kills + if (compiler->compFloatingPointUsed) + { + killMask = RBM_CALLEE_TRASH; + } + else + { + killMask = RBM_INT_CALLEE_TRASH; + } +#ifdef _TARGET_ARM_ + if (tree->AsCall()->IsVirtualStub()) + { + killMask |= compiler->virtualStubParamInfo->GetRegMask(); + } +#else // !_TARGET_ARM_ + // Verify that the special virtual stub call registers are in the kill mask. + // We don't just add them unconditionally to the killMask because for most architectures + // they are already in the RBM_CALLEE_TRASH set, + // and we don't want to introduce extra checks and calls in this hot function. + assert(!tree->AsCall()->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == + compiler->virtualStubParamInfo->GetRegMask())); +#endif + } + break; + case GT_STOREIND: + killMask = getKillSetForStoreInd(tree->AsStoreInd()); + break; + +#if defined(PROFILING_SUPPORTED) + // If this method requires profiler ELT hook then mark these nodes as killing + // callee trash registers (excluding RAX and XMM0). The reason for this is that + // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for + // more details. + case GT_RETURN: + if (compiler->compIsProfilerHookNeeded()) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE); + } + break; + + case GT_PROF_HOOK: + if (compiler->compIsProfilerHookNeeded()) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL); + } + break; +#endif // PROFILING_SUPPORTED + + default: + // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE + break; + } + return killMask; +} + +//------------------------------------------------------------------------ +// buildKillPositionsForNode: +// Given some tree node add refpositions for all the registers this node kills +// +// Arguments: +// tree - the tree for which kill positions should be generated +// currentLoc - the location at which the kills should be added +// +// Return Value: +// true - kills were inserted +// false - no kills were inserted +// +// Notes: +// The return value is needed because if we have any kills, we need to make sure that +// all defs are located AFTER the kills. On the other hand, if there aren't kills, +// the multiple defs for a regPair are in different locations. +// If we generate any kills, we will mark all currentLiveVars as being preferenced +// to avoid the killed registers. This is somewhat conservative. + +bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc) +{ + regMaskTP killMask = getKillSetForNode(tree); + bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + if (killMask != RBM_NONE) + { + // The killMask identifies a set of registers that will be used during codegen. + // Mark these as modified here, so when we do final frame layout, we'll know about + // all these registers. This is especially important if killMask contains + // callee-saved registers, which affect the frame size since we need to save/restore them. + // In the case where we have a copyBlk with GC pointers, can need to call the + // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if + // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen, + // which is too late. + compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true)); + + addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); + + // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee + // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs, + // many of which appear to be regressions (because there is more spill on the infrequently path), + // but are not really because the frequent path becomes smaller. Validating these diffs will need + // to be done before making this change. + // if (!blockSequence[curBBSeqNum]->isRunRarely()) + if (enregisterLocalVars) + { + VarSetOps::Iter iter(compiler, currentLiveVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) + { + if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) + { + continue; + } + } + else +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (varTypeIsFloating(varDsc) && + !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex)) + { + continue; + } + Interval* interval = getIntervalForLocalVar(varIndex); + if (isCallKill) + { + interval->preferCalleeSave = true; + } + regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); + + if (newPreferences != RBM_NONE) + { + interval->updateRegisterPreferences(newPreferences); + } + else + { + // If there are no callee-saved registers, the call could kill all the registers. + // This is a valid state, so in that case assert should not trigger. The RA will spill in order to + // free a register later. + assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); + } + } + } + + if (compiler->killGCRefs(tree)) + { + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, + (allRegs(TYP_REF) & ~RBM_ARG_REGS)); + } + return true; + } + + return false; +} + +//---------------------------------------------------------------------------- +// defineNewInternalTemp: Defines a ref position for an internal temp. +// +// Arguments: +// tree - Gentree node requiring an internal register +// regType - Register type +// currentLoc - Location of the temp Def position +// regMask - register mask of candidates for temp +// +RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) +{ + Interval* current = newInterval(regType); + current->isInternal = true; + return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0); +} + +//------------------------------------------------------------------------ +// buildInternalRegisterDefsForNode - build Def positions for internal +// registers required for tree node. +// +// Arguments: +// tree - Gentree node that needs internal registers +// temps - in-out array which is populated with ref positions +// created for Def of internal registers +// +// Returns: +// The total number of Def positions created for internal registers of tree no. +int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* temps[]) +{ + int count; + int internalIntCount = info->internalIntCount; + regMaskTP internalCands = info->getInternalCandidates(this); + + // If the number of internal integer registers required is the same as the number of candidate integer registers in + // the candidate set, then they must be handled as fixed registers. + // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.) + bool fixedRegs = false; + regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT)); + if (((int)genCountBits(internalIntCandidates)) == internalIntCount) + { + fixedRegs = true; + } + + for (count = 0; count < internalIntCount; count++) + { + regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT)); + if (fixedRegs) + { + internalIntCands = genFindLowestBit(internalIntCands); + internalCands &= ~internalIntCands; + } + temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands); + } + + int internalFloatCount = info->internalFloatCount; + for (int i = 0; i < internalFloatCount; i++) + { + regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates()); + temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, internalFPCands); + } + + assert(count < MaxInternalRegisters); + assert(count == (internalIntCount + internalFloatCount)); + return count; +} + +//------------------------------------------------------------------------ +// buildInternalRegisterUsesForNode - adds Use positions for internal +// registers required for tree node. +// +// Arguments: +// tree - Gentree node that needs internal registers +// defs - int array containing Def positions of internal +// registers. +// total - Total number of Def positions in 'defs' array. +// +// Returns: +// Void. +void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total) +{ + assert(total < MaxInternalRegisters); + + // defs[] has been populated by buildInternalRegisterDefsForNode + // now just add uses to the defs previously added. + for (int i = 0; i < total; i++) + { + RefPosition* prevRefPosition = defs[i]; + assert(prevRefPosition != nullptr); + regMaskTP mask = prevRefPosition->registerAssignment; + if (prevRefPosition->isPhysRegRef) + { + newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask); + } + else + { + RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0); + + if (info->isInternalRegDelayFree) + { + newest->delayRegFree = true; + } + } + } +} + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +//------------------------------------------------------------------------ +// buildUpperVectorSaveRefPositions - Create special RefPositions for saving +// the upper half of a set of large vector. +// +// Arguments: +// tree - The current node being handled +// currentLoc - The location of the current node +// +// Return Value: Returns the set of lclVars that are killed by this node, and therefore +// required RefTypeUpperVectorSaveDef RefPositions. +// +// Notes: The returned set is used by buildUpperVectorRestoreRefPositions. +// +VARSET_VALRET_TP +LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc) +{ + assert(enregisterLocalVars); + VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler)); + regMaskTP fpCalleeKillSet = RBM_NONE; + if (!VarSetOps::IsEmpty(compiler, largeVectorVars)) + { + // We actually need to find any calls that kill the upper-half of the callee-save vector registers. + // But we will use as a proxy any node that kills floating point registers. + // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) + fpCalleeKillSet = getKillSetForNode(tree); + if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE) + { + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, + VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars)); + VarSetOps::Iter iter(compiler, liveLargeVectors); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + Interval* varInterval = getIntervalForLocalVar(varIndex); + Interval* tempInterval = newInterval(varInterval->registerType); + tempInterval->isInternal = true; + RefPosition* pos = + newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED); + // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set + // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding + // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval, + // and set varInterval as the relatedInterval of tempInterval. + tempInterval->relatedInterval = varInterval->relatedInterval; + varInterval->relatedInterval = tempInterval; + } + } + } + return liveLargeVectors; +} + +// buildUpperVectorRestoreRefPositions - Create special RefPositions for restoring +// the upper half of a set of large vectors. +// +// Arguments: +// tree - The current node being handled +// currentLoc - The location of the current node +// liveLargeVectors - The set of lclVars needing restores (returned by buildUpperVectorSaveRefPositions) +// +void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree, + LsraLocation currentLoc, + VARSET_VALARG_TP liveLargeVectors) +{ + assert(enregisterLocalVars); + if (!VarSetOps::IsEmpty(compiler, liveLargeVectors)) + { + VarSetOps::Iter iter(compiler, liveLargeVectors); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + Interval* varInterval = getIntervalForLocalVar(varIndex); + Interval* tempInterval = varInterval->relatedInterval; + assert(tempInterval->isInternal == true); + RefPosition* pos = + newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED); + // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval + // of tempInterval. + varInterval->relatedInterval = tempInterval->relatedInterval; + tempInterval->relatedInterval = varInterval; + } + } +} +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + +#ifdef DEBUG +//------------------------------------------------------------------------ +// ComputeOperandDstCount: computes the number of registers defined by a +// node. +// +// For most nodes, this is simple: +// - Nodes that do not produce values (e.g. stores and other void-typed +// nodes) and nodes that immediately use the registers they define +// produce no registers +// - Nodes that are marked as defining N registers define N registers. +// +// For contained nodes, however, things are more complicated: for purposes +// of bookkeeping, a contained node is treated as producing the transitive +// closure of the registers produced by its sources. +// +// Arguments: +// operand - The operand for which to compute a register count. +// +// Returns: +// The number of registers defined by `operand`. +// +// static +int LinearScan::ComputeOperandDstCount(GenTree* operand) +{ + // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though + // note that it is not in the linear order. It seems best to check for !IsLIR() rather than + // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method. + if (!operand->IsLIR()) + { + return 0; + } + if (operand->isContained()) + { + int dstCount = 0; + for (GenTree* op : operand->Operands()) + { + dstCount += ComputeOperandDstCount(op); + } + + return dstCount; + } + if (operand->IsUnusedValue()) + { + // Operands that define an unused value do not produce any registers. + return 0; + } + if (operand->IsValue()) + { + // Operands that are values and are not contained consume all of their operands + // and produce one or more registers. + return operand->GetRegisterDstCount(); + } + else + { + // This must be one of the operand types that are neither contained nor produce a value. + // Stores and void-typed operands may be encountered when processing call nodes, which contain + // pointers to argument setup stores. + assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || + operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || + operand->TypeGet() == TYP_VOID); + return 0; + } +} + +//------------------------------------------------------------------------ +// ComputeAvailableSrcCount: computes the number of registers available as +// sources for a node. +// +// This is simply the sum of the number of registers produced by each +// operand to the node. +// +// Arguments: +// node - The node for which to compute a source count. +// +// Return Value: +// The number of registers available as sources for `node`. +// +// static +int LinearScan::ComputeAvailableSrcCount(GenTree* node) +{ + int numSources = 0; + for (GenTree* operand : node->Operands()) + { + numSources += ComputeOperandDstCount(operand); + } + + return numSources; +} +#endif // DEBUG + +//------------------------------------------------------------------------ +// buildRefPositionsForNode: The main entry point for building the RefPositions +// and "tree temp" Intervals for a given node. +// +// Arguments: +// tree - The node for which we are building RefPositions +// block - The BasicBlock in which the node resides +// currentLoc - The LsraLocation of the given node +// +void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc) +{ +#ifdef _TARGET_ARM_ + assert(!isRegPairType(tree->TypeGet())); +#endif // _TARGET_ARM_ + + // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. + // GT_CLS_VAR nodes should have been eliminated by rationalizer. + assert(tree->OperGet() != GT_ARGPLACE); + assert(tree->OperGet() != GT_LIST); + assert(tree->OperGet() != GT_CLS_VAR); + + // The LIR traversal visits only the first node in a GT_FIELD_LIST. + assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead()); + + // The set of internal temporary registers used by this node are stored in the + // gtRsvdRegs register mask. Clear it out. + tree->gtRsvdRegs = RBM_NONE; + +#ifdef DEBUG + if (VERBOSE) + { + dumpDefList(); + compiler->gtDispTree(tree, nullptr, nullptr, true); + } +#endif // DEBUG + + // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will + // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will + // be thrown away when we're done. + LocationInfoListNode* locationInfo = nullptr; + TreeNodeInfo tempInfo; + TreeNodeInfo* info = nullptr; + int consume = 0; + int produce = 0; + if (!tree->isContained()) + { + if (tree->IsValue()) + { + locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree); + currentNodeInfo = &locationInfo->info; + } + else + { + currentNodeInfo = &tempInfo; + } + info = currentNodeInfo; + info->Initialize(this, tree); + BuildNode(tree); + assert(info->IsValid(this)); + consume = info->srcCount; + produce = info->dstCount; +#ifdef DEBUG + if (VERBOSE) + { + printf(" +"); + info->dump(this); + tree->dumpLIRFlags(); + printf("\n"); + } +#endif // DEBUG + } + +#ifdef DEBUG + if (VERBOSE) + { + if (tree->isContained()) + { + JITDUMP("Contained\n"); + } + else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && tree->IsUnusedValue()) + { + JITDUMP("Unused\n"); + } + else + { + JITDUMP(" consume=%d produce=%d\n", consume, produce); + } + } +#endif // DEBUG + + assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume)); + + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; + if (isCandidateVar(varDsc)) + { + assert(consume == 0); + + // We handle tracked variables differently from non-tracked ones. If it is tracked, + // we simply add a use or def of the tracked variable. Otherwise, for a use we need + // to actually add the appropriate references for loading or storing the variable. + // + // It won't actually get used or defined until the appropriate ancestor tree node + // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument + // to a call + + assert(varDsc->lvTracked); + unsigned varIndex = varDsc->lvVarIndex; + + // We have only approximate last-use information at this point. This is because the + // execution order doesn't actually reflect the true order in which the localVars + // are referenced - but the order of the RefPositions will, so we recompute it after + // RefPositions are built. + // Use the old value for setting currentLiveVars - note that we do this with the + // not-quite-correct setting of lastUse. However, this is OK because + // 1) this is only for preferencing, which doesn't require strict correctness, and + // 2) the cases where these out-of-order uses occur should not overlap a kill. + // TODO-Throughput: clean this up once we have the execution order correct. At that point + // we can update currentLiveVars at the same place that we create the RefPosition. + if ((tree->gtFlags & GTF_VAR_DEATH) != 0) + { + VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); + } + + if (!tree->IsUnusedValue() && !tree->isContained()) + { + assert(produce != 0); + + locationInfo->interval = getIntervalForLocalVar(varIndex); + defList.Append(locationInfo); + } + return; + } + } + if (tree->isContained()) + { + return; + } + + // Handle the case of local variable assignment + Interval* varDefInterval = nullptr; + + GenTree* defNode = tree; + + // noAdd means the node creates a def but for purposes of map + // management do not add it because data is not flowing up the + // tree + + bool noAdd = info->isLocalDefUse; + RefPosition* prevPos = nullptr; + + bool isSpecialPutArg = false; + + assert(!tree->OperIsAssignment()); + if (tree->OperIsLocalStore()) + { + GenTreeLclVarCommon* const store = tree->AsLclVarCommon(); + assert((consume > 1) || (regType(store->gtOp1->TypeGet()) == regType(store->TypeGet()))); + + LclVarDsc* varDsc = &compiler->lvaTable[store->gtLclNum]; + if (isCandidateVar(varDsc)) + { + // We always push the tracked lclVar intervals + assert(varDsc->lvTracked); + unsigned varIndex = varDsc->lvVarIndex; + varDefInterval = getIntervalForLocalVar(varIndex); + assert((store->gtFlags & GTF_VAR_DEF) != 0); + defNode = tree; + if (produce == 0) + { + produce = 1; + noAdd = true; + } + + assert(consume <= MAX_RET_REG_COUNT); + if (consume == 1) + { + // Get the location info for the register defined by the first operand. + LocationInfoListNode& operandInfo = *(useList.Begin()); + assert(operandInfo.treeNode == tree->gtGetOp1()); + + Interval* srcInterval = operandInfo.interval; + if (srcInterval->relatedInterval == nullptr) + { + // Preference the source to the dest, unless this is a non-last-use localVar. + // Note that the last-use info is not correct, but it is a better approximation than preferencing + // the source to the dest, if the source's lifetime extends beyond the dest. + if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0) + { + srcInterval->assignRelatedInterval(varDefInterval); + } + } + else if (!srcInterval->isLocalVar) + { + // Preference the source to dest, if src is not a local var. + srcInterval->assignRelatedInterval(varDefInterval); + } + } + + if ((tree->gtFlags & GTF_VAR_DEATH) == 0) + { + VarSetOps::AddElemD(compiler, currentLiveVars, varIndex); + } + } + else if (store->gtOp1->OperIs(GT_BITCAST)) + { + store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet(); + + // Get the location info for the register defined by the first operand. + LocationInfoListNode& operandInfo = *(useList.Begin()); + assert(operandInfo.treeNode == tree->gtGetOp1()); + + Interval* srcInterval = operandInfo.interval; + srcInterval->registerType = regType(store->TypeGet()); + + RefPosition* srcDefPosition = srcInterval->firstRefPosition; + assert(srcDefPosition != nullptr); + assert(srcDefPosition->refType == RefTypeDef); + assert(srcDefPosition->treeNode == store->gtOp1); + + srcDefPosition->registerAssignment = allRegs(store->TypeGet()); + operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet())); + } + } + else if (noAdd && produce == 0) + { + // Dead nodes may remain after tree rationalization, decomposition or lowering. + // They should be marked as UnusedValue. + // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation. + assert(!noAdd || (produce != 0)); + } + + Interval* prefSrcInterval = nullptr; + + // If this is a binary operator that will be encoded with 2 operand fields + // (i.e. the target is read-modify-write), preference the dst to op1. + + bool hasDelayFreeSrc = info->hasDelayFreeSrc; + +#if defined(DEBUG) && defined(_TARGET_X86_) + // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode + // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the + // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in + // situations like the following: + // + // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d> + // + // /--* t1026 ref + // t1352 = * putarg_reg ref REG NA + // + // t342 = lclVar int V14 loc6 u:4 REG NA $50c + // + // t343 = const int 1 REG NA $41 + // + // /--* t342 int + // +--* t343 int + // t344 = * + int REG NA $495 + // + // t345 = lclVar int V04 arg4 u:2 REG NA $100 + // + // /--* t344 int + // +--* t345 int + // t346 = * % int REG NA $496 + // + // /--* t346 int + // t1353 = * putarg_reg int REG NA + // + // t1354 = lclVar ref V52 tmp35 (last use) REG NA + // + // /--* t1354 ref + // t1355 = * lea(b+0) byref REG NA + // + // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the + // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register + // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to + // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be + // available due to the special put arg. + const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER; +#else + const bool supportsSpecialPutArg = true; +#endif + + if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && + (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0) + { + // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, + // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register + // (which is actually what we hope will happen). + JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n"); + + // Get the register information for the first operand of the node. + LocationInfoListNode* operandDef = useList.Begin(); + assert(operandDef->treeNode == tree->gtGetOp1()); + + // Preference the destination to the interval of the first register defined by the first operand. + Interval* srcInterval = operandDef->interval; + assert(srcInterval->isLocalVar); + prefSrcInterval = srcInterval; + isSpecialPutArg = true; + } + + RefPosition* internalRefs[MaxInternalRegisters]; + +#ifdef DEBUG + // If we are constraining the registers for allocation, we will modify all the RefPositions + // we've built for this node after we've created them. In order to do that, we'll remember + // the last RefPosition prior to those created for this node. + RefPositionIterator refPositionMark = refPositions.backPosition(); +#endif // DEBUG + + // Make intervals for all the 'internal' register requirements for this node, + // where internal means additional registers required temporarily. + // Create a RefTypeDef RefPosition for each such interval. + int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs); + + // Make use RefPositions for all used values. + int consumed = 0; + for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end; + listNode = listNode->Next()) + { + LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode); + + // For tree temps, a use is always a last use and the end of the range; + // this is set by default in newRefPosition + GenTree* const useNode = locInfo.treeNode; + assert(useNode != nullptr); + + Interval* srcInterval = locInfo.interval; + TreeNodeInfo& useNodeInfo = locInfo.info; + if (useNodeInfo.isTgtPref) + { + prefSrcInterval = srcInterval; + } + + const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree); + + regMaskTP candidates = useNodeInfo.getSrcCandidates(this); +#ifdef _TARGET_ARM_ + regMaskTP allCandidates = candidates; + + if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp()) + { + // get i-th candidate, set bits in useCandidates must be in sequential order. + candidates = genFindLowestReg(allCandidates); + allCandidates &= ~candidates; + } +#endif // _TARGET_ARM_ + + assert((candidates & allRegs(srcInterval->registerType)) != 0); + + // For non-localVar uses we record nothing, as nothing needs to be written back to the tree. + GenTree* const refPosNode = srcInterval->isLocalVar ? useNode : nullptr; + RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, 0); + if (delayRegFree) + { + pos->delayRegFree = true; + } + + if (useNode->IsRegOptional()) + { + pos->setAllocateIfProfitable(true); + } + consumed++; + + // Create additional use RefPositions for multi-reg nodes. + for (int idx = 1; idx < locInfo.info.dstCount; idx++) + { + noway_assert(srcInterval->relatedInterval != nullptr); + srcInterval = srcInterval->relatedInterval; +#ifdef _TARGET_ARM_ + if (useNode->OperIsPutArgSplit() || + (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST))) + { + // get first candidate, set bits in useCandidates must be in sequential order. + candidates = genFindLowestReg(allCandidates); + allCandidates &= ~candidates; + } +#endif // _TARGET_ARM_ + RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, idx); + consumed++; + } + } + + assert(consumed == consume); + if (consume != 0) + { + listNodePool.ReturnNodes(useList); + } + + buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount); + + RegisterType registerType = getDefType(tree); + regMaskTP candidates = info->getDstCandidates(this); + regMaskTP useCandidates = info->getSrcCandidates(this); + +#ifdef DEBUG + if (VERBOSE && produce) + { + printf("Def candidates "); + dumpRegMask(candidates); + printf(", Use candidates "); + dumpRegMask(useCandidates); + printf("\n"); + } +#endif // DEBUG + +#if defined(_TARGET_AMD64_) + // Multi-reg call node is the only node that could produce multi-reg value + assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT)); +#endif // _TARGET_xxx_ + + // Add kill positions before adding def positions + buildKillPositionsForNode(tree, currentLoc + 1); + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + VARSET_TP liveLargeVectors(VarSetOps::UninitVal()); + if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE)) + { + // Build RefPositions for saving any live large vectors. + // This must be done after the kills, so that we know which large vectors are still live. + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1)); + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + ReturnTypeDesc* retTypeDesc = nullptr; + bool isMultiRegCall = tree->IsMultiRegCall(); + if (isMultiRegCall) + { + retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + assert((int)genCountBits(candidates) == produce); + assert(candidates == retTypeDesc->GetABIReturnRegs()); + } + + // push defs + LocationInfoList locationInfoList; + LsraLocation defLocation = currentLoc + 1; + Interval* interval = varDefInterval; + // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field. + // Keep track of the previous interval allocated, for that purpose. + Interval* prevInterval = nullptr; + for (int i = 0; i < produce; i++) + { + regMaskTP currCandidates = candidates; + + // In case of multi-reg call node, registerType is given by + // the type of ith position return register. + if (isMultiRegCall) + { + registerType = retTypeDesc->GetReturnRegType((unsigned)i); + currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i)); + useCandidates = allRegs(registerType); + } + +#ifdef _TARGET_ARM_ + // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order. + if (tree->OperIsPutArgSplit() || tree->OperIsMultiRegOp()) + { + // get i-th candidate + currCandidates = genFindLowestReg(candidates); + candidates &= ~currCandidates; + } +#endif // _TARGET_ARM_ + + if (interval == nullptr) + { + // Make a new interval + interval = newInterval(registerType); + if (hasDelayFreeSrc || info->isInternalRegDelayFree) + { + interval->hasInterferingUses = true; + } + else if (tree->OperIsConst()) + { + assert(!tree->IsReuseRegVal()); + interval->isConstant = true; + } + + if ((currCandidates & useCandidates) != RBM_NONE) + { + interval->updateRegisterPreferences(currCandidates & useCandidates); + } + + if (isSpecialPutArg) + { + interval->isSpecialPutArg = true; + } + } + else + { + assert(registerTypesEquivalent(interval->registerType, registerType)); + } + + if (prefSrcInterval != nullptr) + { + interval->assignRelatedIntervalIfUnassigned(prefSrcInterval); + } + + // for assignments, we want to create a refposition for the def + // but not push it + if (!noAdd) + { + if (i == 0) + { + locationInfo->interval = interval; + prevInterval = interval; + defList.Append(locationInfo); + } + else + { + // This is the 2nd or subsequent register defined by a multi-reg node. + // Connect them using 'relatedInterval'. + noway_assert(prevInterval != nullptr); + prevInterval->relatedInterval = interval; + prevInterval = interval; + prevInterval->isMultiReg = true; + interval->isMultiReg = true; + } + } + + RefPosition* pos = newRefPosition(interval, defLocation, RefTypeDef, defNode, currCandidates, (unsigned)i); + if (info->isLocalDefUse) + { + // This must be an unused value, OR it is a special node for which we allocate + // a target register even though it produces no value. + assert(defNode->IsUnusedValue() || (defNode->gtOper == GT_LOCKADD)); + pos->isLocalDefUse = true; + pos->lastUse = true; + } + interval->updateRegisterPreferences(currCandidates); + interval->updateRegisterPreferences(useCandidates); + interval = nullptr; + } + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + // SaveDef position must be at the same location as Def position of call node. + if (enregisterLocalVars) + { + buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors); + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + +#ifdef DEBUG + // If we are constraining registers, modify all the RefPositions we've just built to specify the + // minimum reg count required. + if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT)) + { + // The number of registers required for a tree node is the sum of + // consume + produce + internalCount. This is the minimum + // set of registers that needs to be ensured in candidate + // set of ref positions created. + unsigned minRegCount = consume + produce + info->internalIntCount + info->internalFloatCount; + + for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) + { + RefPosition* newRefPosition = &(*refPositionMark); + unsigned minRegCountForRef = minRegCount; + if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree) + { + // If delayRegFree, then Use will interfere with the destination of the consuming node. + // Therefore, we also need add the kill set of the consuming node to minRegCount. + // + // For example consider the following IR on x86, where v01 and v02 + // are method args coming in ecx and edx respectively. + // GT_DIV(v01, v02) + // + // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node. + // + // Assume further JitStressRegs=2, which would constrain candidates to callee trashable + // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01. + // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and + // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for + // the use position of v02 also needs to take into account the kill set of its consuming node. + regMaskTP killMask = getKillSetForNode(tree); + if (killMask != RBM_NONE) + { + minRegCountForRef += genCountBits(killMask); + } + } + newRefPosition->minRegCandidateCount = minRegCountForRef; + if (newRefPosition->IsActualRef() && doReverseCallerCallee()) + { + Interval* interval = newRefPosition->getInterval(); + regMaskTP oldAssignment = newRefPosition->registerAssignment; + regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); + newRefPosition->registerAssignment = + getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef); + if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && + !interval->isLocalVar) + { + checkConflictingDefUse(newRefPosition); + } + } + } + } +#endif // DEBUG + JITDUMP("\n"); +} + +//------------------------------------------------------------------------ +// buildPhysRegRecords: Make an interval for each physical register +// +void LinearScan::buildPhysRegRecords() +{ + RegisterType regType = IntRegisterType; + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* curr = &physRegs[reg]; + curr->init(reg); + } +} + +//------------------------------------------------------------------------ +// getNonEmptyBlock: Return the first non-empty block starting with 'block' +// +// Arguments: +// block - the BasicBlock from which we start looking +// +// Return Value: +// The first non-empty BasicBlock we find. +// +BasicBlock* getNonEmptyBlock(BasicBlock* block) +{ + while (block != nullptr && block->bbTreeList == nullptr) + { + BasicBlock* nextBlock = block->bbNext; + // Note that here we use the version of NumSucc that does not take a compiler. + // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan. + // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which + // case we don't need the version that takes a compiler. + assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE))); + // sometimes the first block is empty and ends with an uncond branch + // assert( block->GetSucc(0) == nextBlock); + block = nextBlock; + } + assert(block != nullptr && block->bbTreeList != nullptr); + return block; +} + +//------------------------------------------------------------------------ +// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block +// +// Notes: +// Prior to calling this method, 'currentLiveVars' must be set to the set of register +// candidate variables that are liveIn to the first block. +// For each register candidate that is live-in to the first block: +// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created. +// - Otherwise, it will be marked as spilled, since it will not be assigned a register +// on entry and will be loaded from memory on the undefined path. +// Note that, when the compInitMem option is not set, we may encounter these on +// paths that are protected by the same condition as an earlier def. However, since +// we don't do the analysis to determine this - and couldn't rely on always identifying +// such cases even if we tried - we must conservatively treat the undefined path as +// being possible. This is a relatively rare case, so the introduced conservatism is +// not expected to warrant the analysis required to determine the best placement of +// an initialization. +// +void LinearScan::insertZeroInitRefPositions() +{ + assert(enregisterLocalVars); +#ifdef DEBUG + VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn)); + assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars)); +#endif // DEBUG + + // insert defs for this, then a block boundary + + VarSetOps::Iter iter(compiler, currentLiveVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + if (!varDsc->lvIsParam && isCandidateVar(varDsc)) + { + JITDUMP("V%02u was live in to first block:", varNum); + Interval* interval = getIntervalForLocalVar(varIndex); + if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) + { + JITDUMP(" creating ZeroInit\n"); + GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); + RefPosition* pos = + newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); + varDsc->lvMustInit = true; + } + else + { + setIntervalAsSpilled(interval); + JITDUMP(" marking as spilled\n"); + } + } + } +} + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +//------------------------------------------------------------------------ +// unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems. +// +// Arguments: +// argDsc - the LclVarDsc for the argument of interest +// +// Notes: +// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp +// for how state for argument is updated for unix non-structs and Windows AMD64 structs. +// +void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) +{ + assert(varTypeIsStruct(argDsc)); + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + + if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + else + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + } + + if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + else + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + } +} + +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +//------------------------------------------------------------------------ +// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate +// regState (either compiler->intRegState or compiler->floatRegState), +// with the lvArgReg on "argDsc" +// +// Arguments: +// argDsc - the argument for which the state is to be updated. +// +// Return Value: None +// +// Assumptions: +// The argument is live on entry to the function +// (or is untracked and therefore assumed live) +// +// Notes: +// This relies on a method in regAlloc.cpp that is shared between LSRA +// and regAlloc. It is further abstracted here because regState is updated +// separately for tracked and untracked variables in LSRA. +// +void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) +{ +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For System V AMD64 calls the argDsc can have 2 registers (for structs.) + // Handle them here. + if (varTypeIsStruct(argDsc)) + { + unixAmd64UpdateRegStateForArg(argDsc); + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + // In the case of AMD64 we'll still use the floating point registers + // to model the register usage for argument on vararg calls, so + // we will ignore the varargs condition to determine whether we use + // XMM registers or not for setting up the call. + bool isFloat = (isFloatRegType(argDsc->lvType) +#ifndef _TARGET_AMD64_ + && !compiler->info.compIsVarArgs +#endif + && !compiler->opts.compUseSoftFP); + + if (argDsc->lvIsHfaRegArg()) + { + isFloat = true; + } + + if (isFloat) + { + JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); + compiler->raUpdateRegStateForArg(floatRegState, argDsc); + } + else + { + JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); +#if FEATURE_MULTIREG_ARGS + if (argDsc->lvOtherArgReg != REG_NA) + { + JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg)); + } +#endif // FEATURE_MULTIREG_ARGS + compiler->raUpdateRegStateForArg(intRegState, argDsc); + } + } +} + +//------------------------------------------------------------------------ +// buildIntervals: The main entry point for building the data structures over +// which we will do register allocation. +// +void LinearScan::buildIntervals() +{ + BasicBlock* block; + + JITDUMP("\nbuildIntervals ========\n"); + + // Build (empty) records for all of the physical registers + buildPhysRegRecords(); + +#ifdef DEBUG + if (VERBOSE) + { + printf("\n-----------------\n"); + printf("LIVENESS:\n"); + printf("-----------------\n"); + foreach_block(compiler, block) + { + printf("BB%02u use def in out\n", block->bbNum); + dumpConvertedVarSet(compiler, block->bbVarUse); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbVarDef); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbLiveIn); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbLiveOut); + printf("\n"); + } + } +#endif // DEBUG + +#if DOUBLE_ALIGN + // We will determine whether we should double align the frame during + // identifyCandidates(), but we initially assume that we will not. + doDoubleAlign = false; +#endif + + identifyCandidates(); + + // Figure out if we're going to use a frame pointer. We need to do this before building + // the ref positions, because those objects will embed the frame register in various register masks + // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will + // remove the frame pointer from the masks. + setFrameType(); + + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); + + // second part: + JITDUMP("\nbuildIntervals second part ========\n"); + currentLoc = 0; + // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were + // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering. + // Previously, the block sequencing was done for the (formerly separate) Build pass, + // and the curBBNum was left as the last block sequenced. This block was then used to set the + // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the + // normalized entry weight (compiler->fgCalledCount), but that results in a net regression. + if (!blockSequencingDone) + { + setBlockSequence(); + } + curBBNum = blockSequence[bbSeqCount - 1]->bbNum; + + // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex. + // Assign these RefPositions to the (nonexistent) BB0. + curBBNum = 0; + + LclVarDsc* argDsc; + unsigned int lclNum; + + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; + floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; + + for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) + { + lclNum = compiler->lvaTrackedToVarNum[varIndex]; + argDsc = &(compiler->lvaTable[lclNum]); + + if (!argDsc->lvIsParam) + { + continue; + } + + // Only reserve a register if the argument is actually used. + // Is it dead on entry? If compJmpOpUsed is true, then the arguments + // have to be kept alive, so we have to consider it as live on entry. + // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we + // won't have done dataflow on it, but it needs to be marked as live-in so + // it will get saved in the prolog. + if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode) + { + continue; + } + + if (argDsc->lvIsRegArg) + { + updateRegStateForArg(argDsc); + } + + if (isCandidateVar(argDsc)) + { + Interval* interval = getIntervalForLocalVar(varIndex); + regMaskTP mask = allRegs(TypeGet(argDsc)); + if (argDsc->lvIsRegArg) + { + // Set this interval as currently assigned to that register + regNumber inArgReg = argDsc->lvArgReg; + assert(inArgReg < REG_COUNT); + mask = genRegMask(inArgReg); + assignPhysReg(inArgReg, interval); + } + RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); + } + else if (varTypeIsStruct(argDsc->lvType)) + { + for (unsigned fieldVarNum = argDsc->lvFieldLclStart; + fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) + { + LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]); + if (fieldVarDsc->lvLRACandidate) + { + assert(fieldVarDsc->lvTracked); + Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex); + RefPosition* pos = + newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc))); + } + } + } + else + { + // We can overwrite the register (i.e. codegen saves it on entry) + assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister || + !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode)); + } + } + + // Now set up the reg state for the non-tracked args + // (We do this here because we want to generate the ParamDef RefPositions in tracked + // order, so that loop doesn't hit the non-tracked args) + + for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++) + { + argDsc = &(compiler->lvaTable[argNum]); + + if (argDsc->lvPromotedStruct()) + { + noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = argDsc->lvFieldLclStart; + argDsc = &(compiler->lvaTable[fieldVarNum]); + } + noway_assert(argDsc->lvIsParam); + if (!argDsc->lvTracked && argDsc->lvIsRegArg) + { + updateRegStateForArg(argDsc); + } + } + + // If there is a secret stub param, it is also live in + if (compiler->info.compPublishStubParam) + { + intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; + } + + BasicBlock* predBlock = nullptr; + BasicBlock* prevBlock = nullptr; + + // Initialize currentLiveVars to the empty set. We will set it to the current + // live-in at the entry to each block (this will include the incoming args on + // the first block). + VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler)); + + for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) + { + JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum); + + bool predBlockIsAllocated = false; + predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); + if (predBlock) + { + JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n", + block->bbNum, predBlock->bbNum); + assert(predBlock->bbNum <= bbNumMaxBeforeResolution); + blockInfo[block->bbNum].predBBNum = predBlock->bbNum; + } + + if (enregisterLocalVars) + { + VarSetOps::AssignNoCopy(compiler, currentLiveVars, + VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn)); + + if (block == compiler->fgFirstBB) + { + insertZeroInitRefPositions(); + // The first real location is at 1; 0 is for the entry. + currentLoc = 1; + } + + // Any lclVars live-in to a block are resolution candidates. + VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); + + // Determine if we need any DummyDefs. + // We need DummyDefs for cases where "predBlock" isn't really a predecessor. + // Note that it's possible to have uses of unitialized variables, in which case even the first + // block may require DummyDefs, which we are not currently adding - this means that these variables + // will always be considered to be in memory on entry (and reloaded when the use is encountered). + // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized + // variables (which may actually be initialized along the dynamically executed paths, but not + // on all static paths), we wind up with excessive liveranges for some of these variables. + VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); + if (predBlock) + { + // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut + VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); + } + bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); + + // Create dummy def RefPositions + + if (needsDummyDefs) + { + // If we are using locations from a predecessor, we should never require DummyDefs. + assert(!predBlockIsAllocated); + + JITDUMP("Creating dummy definitions\n"); + VarSetOps::Iter iter(compiler, newLiveIn); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. + // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs). + if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam)) + { + Interval* interval = getIntervalForLocalVar(varIndex); + RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, + allRegs(interval->registerType)); + } + } + JITDUMP("Finished creating dummy definitions\n\n"); + } + } + + // Add a dummy RefPosition to mark the block boundary. + // Note that we do this AFTER adding the exposed uses above, because the + // register positions for those exposed uses need to be recorded at + // this point. + + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); + currentLoc += 2; + JITDUMP("\n"); + + LIR::Range& blockRange = LIR::AsRange(block); + for (GenTree* node : blockRange.NonPhiNodes()) + { + // We increment the number position of each tree node by 2 to simplify the logic when there's the case of + // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to + // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only + // increment the number position by one. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + node->gtSeqNum = currentLoc; + // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register + // value. + // Although this looks like a no-op it sets the tag. + node->gtRegNum = node->gtRegNum; +#endif + + buildRefPositionsForNode(node, block, currentLoc); + +#ifdef DEBUG + if (currentLoc > maxNodeLocation) + { + maxNodeLocation = currentLoc; + } +#endif // DEBUG + currentLoc += 2; + } + + // Note: the visited set is cleared in LinearScan::doLinearScan() + markBlockVisited(block); + assert(defList.IsEmpty()); + + if (enregisterLocalVars) + { + // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the + // next block, or any unvisited successors. + // This will address lclVars that are live on a backedge, as well as those that are kept + // live at a GT_JMP. + // + // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, + // and jmp call is represented using GT_JMP node which is a leaf node. + // Liveness phase keeps all the arguments of the method live till the end of + // block by adding them to liveout set of the block containing GT_JMP. + // + // The target of a GT_JMP implicitly uses all the current method arguments, however + // there are no actual references to them. This can cause LSRA to assert, because + // the variables are live but it sees no references. In order to correctly model the + // liveness of these arguments, we add dummy exposed uses, in the same manner as for + // backward branches. This will happen automatically via expUseSet. + // + // Note that a block ending with GT_JMP has no successors and hence the variables + // for which dummy use ref positions are added are arguments of the method. + + VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); + VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); + BasicBlock* nextBlock = getNextBlock(); + if (nextBlock != nullptr) + { + VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); + } + for (BasicBlock* succ : block->GetAllSuccs(compiler)) + { + if (VarSetOps::IsEmpty(compiler, expUseSet)) + { + break; + } + + if (isBlockVisited(succ)) + { + continue; + } + VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); + } + + if (!VarSetOps::IsEmpty(compiler, expUseSet)) + { + JITDUMP("Exposed uses:"); + VarSetOps::Iter iter(compiler, expUseSet); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + assert(isCandidateVar(varDsc)); + Interval* interval = getIntervalForLocalVar(varIndex); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + JITDUMP(" V%02u", varNum); + } + JITDUMP("\n"); + } + + // Clear the "last use" flag on any vars that are live-out from this block. + { + VarSetOps::Iter iter(compiler, block->bbLiveOut); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* const varDsc = &compiler->lvaTable[varNum]; + if (isCandidateVar(varDsc)) + { + RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; + if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) + { + lastRP->lastUse = false; + } + } + } + } + +#ifdef DEBUG + checkLastUses(block); + + if (VERBOSE) + { + printf("use: "); + dumpConvertedVarSet(compiler, block->bbVarUse); + printf("\ndef: "); + dumpConvertedVarSet(compiler, block->bbVarDef); + printf("\n"); + } +#endif // DEBUG + } + + prevBlock = block; + } + + if (enregisterLocalVars) + { + if (compiler->lvaKeepAliveAndReportThis()) + { + // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end + unsigned keepAliveVarNum = compiler->info.compThisArg; + assert(compiler->info.compIsStatic == false); + LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum; + if (isCandidateVar(varDsc)) + { + JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n"); + Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + } + } + +#ifdef DEBUG + if (getLsraExtendLifeTimes()) + { + LclVarDsc* varDsc; + for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) + { + if (varDsc->lvLRACandidate) + { + JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum); + Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + } + } + } +#endif // DEBUG + } + + // If the last block has successors, create a RefTypeBB to record + // what's live + + if (prevBlock->NumSucc(compiler) > 0) + { + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); + } + +#ifdef DEBUG + // Make sure we don't have any blocks that were not visited + foreach_block(compiler, block) + { + assert(isBlockVisited(block)); + } + + if (VERBOSE) + { + lsraDumpIntervals("BEFORE VALIDATING INTERVALS"); + dumpRefPositions("BEFORE VALIDATING INTERVALS"); + validateIntervals(); + } +#endif // DEBUG +} + +#ifdef DEBUG +//------------------------------------------------------------------------ +// validateIntervals: A DEBUG-only method that checks that the lclVar RefPositions +// do not reflect uses of undefined values +// +// Notes: If an undefined use is encountered, it merely prints a message. +// +// TODO-Cleanup: This should probably assert, or at least print the message only +// when doing a JITDUMP. +// +void LinearScan::validateIntervals() +{ + if (enregisterLocalVars) + { + for (unsigned i = 0; i < compiler->lvaTrackedCount; i++) + { + if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate) + { + continue; + } + Interval* interval = getIntervalForLocalVar(i); + + bool defined = false; + printf("-----------------\n"); + for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) + { + ref->dump(); + RefType refType = ref->refType; + if (!defined && RefTypeIsUse(refType)) + { + if (compiler->info.compMethodName != nullptr) + { + printf("%s: ", compiler->info.compMethodName); + } + printf("LocalVar V%02u: undefined use at %u\n", interval->varNum, ref->nodeLocation); + } + // Note that there can be multiple last uses if they are on disjoint paths, + // so we can't really check the lastUse flag + if (ref->lastUse) + { + defined = false; + } + if (RefTypeIsDef(refType)) + { + defined = true; + } + } + } + } +} +#endif // DEBUG + +//------------------------------------------------------------------------ +// GetIndirInfo: Get the source registers for an indirection that might be contained. +// +// Arguments: +// node - The node of interest +// +// Return Value: +// The number of source registers used by the *parent* of this node. +// +// Notes: +// Adds the defining node for each register to the useList. +// +int LinearScan::GetIndirInfo(GenTreeIndir* indirTree) +{ + GenTree* const addr = indirTree->gtOp1; + if (!addr->isContained()) + { + appendLocationInfoToList(addr); + return 1; + } + if (!addr->OperIs(GT_LEA)) + { + return 0; + } + + GenTreeAddrMode* const addrMode = addr->AsAddrMode(); + + unsigned srcCount = 0; + if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) + { + appendLocationInfoToList(addrMode->Base()); + srcCount++; + } + if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) + { + appendLocationInfoToList(addrMode->Index()); + srcCount++; + } + return srcCount; +} + +//------------------------------------------------------------------------ +// GetOperandInfo: Get the source registers for an operand that might be contained. +// +// Arguments: +// node - The node of interest +// useList - The list of uses for the node that we're currently processing +// +// Return Value: +// The number of source registers used by the *parent* of this node. +// +// Notes: +// Adds the defining node for each register to the given useList. +// +int LinearScan::GetOperandInfo(GenTree* node) +{ + if (!node->isContained()) + { + appendLocationInfoToList(node); + return 1; + } + +#if !defined(_TARGET_64BIT_) + if (node->OperIs(GT_LONG)) + { + return appendBinaryLocationInfoToList(node->AsOp()); + } +#endif // !defined(_TARGET_64BIT_) + if (node->OperIsIndir()) + { + const unsigned srcCount = GetIndirInfo(node->AsIndir()); + return srcCount; + } + + return 0; +} + +//------------------------------------------------------------------------ +// GetOperandInfo: Get the source registers for an operand that might be contained. +// +// Arguments: +// node - The node of interest +// useList - The list of uses for the node that we're currently processing +// +// Return Value: +// The number of source registers used by the *parent* of this node. +// +// Notes: +// Adds the defining node for each register to the useList. +// +int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo) +{ + LocationInfoListNode* prevLast = useList.Last(); + int srcCount = GetOperandInfo(node); + if (prevLast == nullptr) + { + *pFirstInfo = useList.Begin(); + } + else + { + *pFirstInfo = prevLast->Next(); + } + return srcCount; +} + +void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node) +{ + _dstCount = 0; + _srcCount = 0; + _internalIntCount = 0; + _internalFloatCount = 0; + + isLocalDefUse = false; + isDelayFree = false; + hasDelayFreeSrc = false; + isTgtPref = false; + isInternalRegDelayFree = false; + + regMaskTP dstCandidates; + + // if there is a reg indicated on the tree node, use that for dstCandidates + // the exception is the NOP, which sometimes show up around late args. + // TODO-Cleanup: get rid of those NOPs. + if (node->gtRegNum == REG_STK) + { + dstCandidates = RBM_NONE; + } + else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP) + { +#ifdef ARM_SOFTFP + if (node->OperGet() == GT_PUTARG_REG) + { + dstCandidates = lsra->allRegs(TYP_INT); + } + else +#endif + { + dstCandidates = lsra->allRegs(node->TypeGet()); + } + } + else + { + dstCandidates = genRegMask(node->gtRegNum); + } + + setDstCandidates(lsra, dstCandidates); + srcCandsIndex = dstCandsIndex; + + setInternalCandidates(lsra, lsra->allRegs(TYP_INT)); + +#ifdef DEBUG + isInitialized = true; +#endif + + assert(IsValid(lsra)); +} + +//------------------------------------------------------------------------ +// getSrcCandidates: Get the source candidates (candidates for the consumer +// of the node) from the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Return Value: +// The set of registers (as a register mask) that are candidates for the +// consumer of the node +// +// Notes: +// The LinearScan object maintains the mapping from the indices kept in the +// TreeNodeInfo to the actual register masks. +// +regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(srcCandsIndex); +} + +//------------------------------------------------------------------------ +// setSrcCandidates: Set the source candidates (candidates for the consumer +// of the node) on the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Notes: see getSrcCandidates +// +void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + srcCandsIndex = (unsigned char)i; +} + +//------------------------------------------------------------------------ +// getDstCandidates: Get the dest candidates (candidates for the definition +// of the node) from the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Return Value: +// The set of registers (as a register mask) that are candidates for the +// node itself +// +// Notes: see getSrcCandidates +// +regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(dstCandsIndex); +} + +//------------------------------------------------------------------------ +// setDstCandidates: Set the dest candidates (candidates for the definition +// of the node) on the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Notes: see getSrcCandidates +// +void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + dstCandsIndex = (unsigned char)i; +} + +//------------------------------------------------------------------------ +// getInternalCandidates: Get the internal candidates (candidates for the internal +// temporary registers used by a node) from the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Return Value: +// The set of registers (as a register mask) that are candidates for the +// internal temporary registers. +// +// Notes: see getSrcCandidates +// +regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(internalCandsIndex); +} + +//------------------------------------------------------------------------ +// getInternalCandidates: Set the internal candidates (candidates for the internal +// temporary registers used by a node) on the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Notes: see getSrcCandidates +// +void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + internalCandsIndex = (unsigned char)i; +} + +//------------------------------------------------------------------------ +// addInternalCandidates: Add internal candidates (candidates for the internal +// temporary registers used by a node) on the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Notes: see getSrcCandidates +// +void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask); + assert(FitsIn<unsigned char>(i)); + internalCandsIndex = (unsigned char)i; +} + +//------------------------------------------------------------------------ +// BuildStoreLoc: Set register requirements for a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Setting the appropriate candidates for a store of a multi-reg call return value. +// - Handling of contained immediates. +// - Requesting an internal register for SIMD12 stores. +// +void LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + TreeNodeInfo* info = currentNodeInfo; + GenTree* op1 = storeLoc->gtGetOp1(); + + assert(info->dstCount == 0); + + if (op1->IsMultiRegCall()) + { + // This is the case of var = call where call is returning + // a value in multiple return registers. + // Must be a store lclvar. + assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); + + // srcCount = number of registers in which the value is returned by call + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + info->srcCount = regCount; + + // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 + regMaskTP srcCandidates = allMultiRegCallNodeRegs(call); + LocationInfoListNode* locInfo = getLocationInfo(op1); + locInfo->info.setSrcCandidates(this, srcCandidates); + useList.Append(locInfo); + } +#ifndef _TARGET_64BIT_ + else if (varTypeIsLong(op1)) + { + if (op1->OperIs(GT_MUL_LONG)) + { +#ifdef _TARGET_X86_ + // This is actually a bug. A GT_MUL_LONG produces two registers, but is modeled as only producing + // eax (and killing edx). This only works because it always occurs as var = GT_MUL_LONG (ensured by + // DecomposeMul), and therefore edx won't be reused before the store. + // TODO-X86-Cleanup: GT_MUL_LONG should be a multireg node on x86, just as on ARM. + info->srcCount = 1; +#else + info->srcCount = 2; +#endif + appendLocationInfoToList(op1); + } + else + { + assert(op1->OperIs(GT_LONG)); + assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained()); + info->srcCount = appendBinaryLocationInfoToList(op1->AsOp()); + assert(info->srcCount == 2); + } + } +#endif // !_TARGET_64BIT_ + else if (op1->isContained()) + { + info->srcCount = 0; + } + else + { + info->srcCount = 1; + appendLocationInfoToList(op1); + } + +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(storeLoc)) + { + if (!op1->isContained() && (storeLoc->TypeGet() == TYP_SIMD12)) + { +// Need an additional register to extract upper 4 bytes of Vector3. +#ifdef _TARGET_XARCH_ + info->internalFloatCount = 1; + info->setInternalCandidates(this, allSIMDRegs()); +#elif defined(_TARGET_ARM64_) + info->internalIntCount = 1; +#else +#error "Unknown target architecture for STORE_LCL_VAR of SIMD12" +#endif + } + } +#endif // FEATURE_SIMD +} + +//------------------------------------------------------------------------ +// BuildSimple: Sets the srcCount for all the trees +// without special handling based on the tree node type. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void LinearScan::BuildSimple(GenTree* tree) +{ + TreeNodeInfo* info = currentNodeInfo; + unsigned kind = tree->OperKind(); + assert(info->dstCount == (tree->IsValue() ? 1 : 0)); + if (kind & (GTK_CONST | GTK_LEAF)) + { + info->srcCount = 0; + } + else if (kind & (GTK_SMPOP)) + { + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + } + else + { + unreached(); + } +} + +//------------------------------------------------------------------------ +// BuildReturn: Set the NodeInfo for a GT_RETURN. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void LinearScan::BuildReturn(GenTree* tree) +{ + TreeNodeInfo* info = currentNodeInfo; + assert(info->dstCount == 0); + GenTree* op1 = tree->gtGetOp1(); + +#if !defined(_TARGET_64BIT_) + if (tree->TypeGet() == TYP_LONG) + { + assert((op1->OperGet() == GT_LONG) && op1->isContained()); + GenTree* loVal = op1->gtGetOp1(); + GenTree* hiVal = op1->gtGetOp2(); + info->srcCount = 2; + LocationInfoListNode* loValInfo = getLocationInfo(loVal); + LocationInfoListNode* hiValInfo = getLocationInfo(hiVal); + loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO); + hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI); + useList.Append(loValInfo); + useList.Append(hiValInfo); + } + else +#endif // !defined(_TARGET_64BIT_) + if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) + { + regMaskTP useCandidates = RBM_NONE; + + info->srcCount = 1; + +#if FEATURE_MULTIREG_RET + if (varTypeIsStruct(tree)) + { + // op1 has to be either an lclvar or a multi-reg returning call + if (op1->OperGet() != GT_LCL_VAR) + { + noway_assert(op1->IsMultiRegCall()); + + ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); + info->srcCount = retTypeDesc->GetReturnRegCount(); + useCandidates = retTypeDesc->GetABIReturnRegs(); + } + } + else +#endif // FEATURE_MULTIREG_RET + { + // Non-struct type return - determine useCandidates + switch (tree->TypeGet()) + { + case TYP_VOID: + useCandidates = RBM_NONE; + break; + case TYP_FLOAT: + useCandidates = RBM_FLOATRET; + break; + case TYP_DOUBLE: + // We ONLY want the valid double register in the RBM_DOUBLERET mask. + useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); + break; + case TYP_LONG: + useCandidates = RBM_LNGRET; + break; + default: + useCandidates = RBM_INTRET; + break; + } + } + + LocationInfoListNode* locationInfo = getLocationInfo(op1); + if (useCandidates != RBM_NONE) + { + locationInfo->info.setSrcCandidates(this, useCandidates); + } + useList.Append(locationInfo); + } +} + +//------------------------------------------------------------------------ +// BuildPutArgReg: Set the NodeInfo for a PUTARG_REG. +// +// Arguments: +// node - The PUTARG_REG node. +// argReg - The register in which to pass the argument. +// info - The info for the node's using call. +// isVarArgs - True if the call uses a varargs calling convention. +// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. +// +// Return Value: +// None. +// +void LinearScan::BuildPutArgReg(GenTreeUnOp* node) +{ + TreeNodeInfo* info = currentNodeInfo; + assert(node != nullptr); + assert(node->OperIsPutArgReg()); + info->srcCount = 1; + regNumber argReg = node->gtRegNum; + assert(argReg != REG_NA); + + // Set the register requirements for the node. + regMaskTP argMask = genRegMask(argReg); + +#ifdef _TARGET_ARM_ + // If type of node is `long` then it is actually `double`. + // The actual `long` types must have been transformed as a field list with two fields. + if (node->TypeGet() == TYP_LONG) + { + info->srcCount++; + info->dstCount = info->srcCount; + assert(genRegArgNext(argReg) == REG_NEXT(argReg)); + argMask |= genRegMask(REG_NEXT(argReg)); + } +#endif // _TARGET_ARM_ + info->setDstCandidates(this, argMask); + info->setSrcCandidates(this, argMask); + + // To avoid redundant moves, have the argument operand computed in the + // register in which the argument is passed to the call. + LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1); + op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this)); + op1Info->info.isDelayFree = true; + useList.Append(op1Info); +} + +//------------------------------------------------------------------------ +// HandleFloatVarArgs: Handle additional register requirements for a varargs call +// +// Arguments: +// call - The call node of interest +// argNode - The current argument +// +// Return Value: +// None. +// +// Notes: +// In the case of a varargs call, the ABI dictates that if we have floating point args, +// we must pass the enregistered arguments in both the integer and floating point registers. +// Since the integer register is not associated with the arg node, we will reserve it as +// an internal register on the call so that it is not used during the evaluation of the call node +// (e.g. for the target). +void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs) +{ +#if FEATURE_VARARG + TreeNodeInfo* info = currentNodeInfo; + if (call->IsVarargs() && varTypeIsFloating(argNode)) + { + *callHasFloatRegArgs = true; + + regNumber argReg = argNode->gtRegNum; + regNumber targetReg = compiler->getCallArgIntRegister(argReg); + info->setInternalIntCount(info->internalIntCount + 1); + info->addInternalCandidates(this, genRegMask(targetReg)); + } +#endif // FEATURE_VARARG +} + +//------------------------------------------------------------------------ +// BuildGCWriteBarrier: Handle additional register requirements for a GC write barrier +// +// Arguments: +// tree - The STORE_IND for which a write barrier is required +// +void LinearScan::BuildGCWriteBarrier(GenTree* tree) +{ + TreeNodeInfo* info = currentNodeInfo; + GenTree* dst = tree; + GenTree* addr = tree->gtOp.gtOp1; + GenTree* src = tree->gtOp.gtOp2; + LocationInfoListNode* addrInfo = getLocationInfo(addr); + LocationInfoListNode* srcInfo = getLocationInfo(src); + + // In the case where we are doing a helper assignment, even if the dst + // is an indir through an lea, we need to actually instantiate the + // lea in a register + assert(!addr->isContained() && !src->isContained()); + useList.Append(addrInfo); + useList.Append(srcInfo); + info->srcCount = 2; + assert(info->dstCount == 0); + bool customSourceRegs = false; + +#if NOGC_WRITE_BARRIERS + +#if defined(_TARGET_ARM64_) + // For the NOGC JIT Helper calls + // + // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) + // the 'src' goes into x15 (REG_WRITE_BARRIER) + // + addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF); + srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); + customSourceRegs = true; + +#elif defined(_TARGET_X86_) + + bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src); + + if (useOptimizedWriteBarrierHelper) + { + // Special write barrier: + // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and + // op2 (src) goes into any int register. + addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); + srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC); + customSourceRegs = true; + } +#else // !defined(_TARGET_X86_) && !defined(_TARGET_ARM64_) +#error "NOGC_WRITE_BARRIERS is not supported" +#endif // !defined(_TARGET_X86_) + +#endif // NOGC_WRITE_BARRIERS + + if (!customSourceRegs) + { + // For the standard JIT Helper calls: + // op1 (addr) goes into REG_ARG_0 and + // op2 (src) goes into REG_ARG_1 + addrInfo->info.setSrcCandidates(this, RBM_ARG_0); + srcInfo->info.setSrcCandidates(this, RBM_ARG_1); + } + + // Both src and dst must reside in a register, which they should since we haven't set + // either of them as contained. + assert(addrInfo->info.dstCount == 1); + assert(srcInfo->info.dstCount == 1); +} + +//------------------------------------------------------------------------ +// BuildCmp: Set the register requirements for a compare. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void LinearScan::BuildCmp(GenTree* tree) +{ + TreeNodeInfo* info = currentNodeInfo; + assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP)); + + info->srcCount = 0; + assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID)); + +#ifdef _TARGET_X86_ + // If the compare is used by a jump, we just need to set the condition codes. If not, then we need + // to store the result into the low byte of a register, which requires the dst be a byteable register. + // We always set the dst candidates, though, because if this is compare is consumed by a jump, they + // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear + // that flag is maintained until this location (especially for decomposed long compares). + info->setDstCandidates(this, RBM_BYTE_REGS); +#endif // _TARGET_X86_ + + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); +} + +#endif // !LEGACY_BACKEND diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index b376788a7a..3fe91765b3 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -29,75 +29,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" //------------------------------------------------------------------------ -// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar -// -// Arguments: -// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) -// -// Notes: -// This involves: -// - Setting the appropriate candidates for a store of a multi-reg call return value. -// - Requesting an internal register for SIMD12 stores. -// -void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNodeInfo* info) -{ - assert(info->dstCount == 0); - GenTree* op1 = storeLoc->gtGetOp1(); - -#ifdef _TARGET_X86_ - if (op1->OperGet() == GT_LONG) - { - assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained()); - info->srcCount = appendBinaryLocationInfoToList(op1->AsOp()); - assert(info->srcCount == 2); - } - else -#endif // _TARGET_X86_ - if (op1->isContained()) - { - info->srcCount = 0; - } - else if (op1->IsMultiRegCall()) - { - // This is the case of var = call where call is returning - // a value in multiple return registers. - // Must be a store lclvar. - assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); - - // srcCount = number of registers in which the value is returned by call - GenTreeCall* call = op1->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = retTypeDesc->GetReturnRegCount(); - info->srcCount = regCount; - - // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 - regMaskTP srcCandidates = allMultiRegCallNodeRegs(call); - LocationInfoListNode* locInfo = getLocationInfo(op1); - locInfo->info.setSrcCandidates(this, srcCandidates); - useList.Append(locInfo); - } - else - { - info->srcCount = 1; - appendLocationInfoToList(op1); - } - -#ifdef FEATURE_SIMD - if (varTypeIsSIMD(storeLoc)) - { - if (!op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12)) - { - // Need an additional register to extract upper 4 bytes of Vector3. - info->internalFloatCount = 1; - info->setInternalCandidates(this, allSIMDRegs()); - } - return; - } -#endif // FEATURE_SIMD -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInit: Set register requirements for a node +// BuildNode: Set register requirements for a node // // Arguments: // treeNode - the node of interest @@ -111,16 +43,11 @@ void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNod // requirements needed by LSRA to build the Interval Table (source, // destination and internal [temp] register counts). // -void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildNode(GenTree* tree) { - if (tree->isContained()) - { - info->dstCount = 0; - assert(info->srcCount == 0); - return; - } + TreeNodeInfo* info = currentNodeInfo; + assert(!tree->isContained()); - // Set the default dstCount. This may be modified below. if (tree->IsValue()) { info->dstCount = 1; @@ -139,7 +66,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) switch (tree->OperGet()) { default: - TreeNodeInfoInitSimple(tree, info); + BuildSimple(tree); break; case GT_LCL_VAR: @@ -181,7 +108,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: - TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info); + BuildStoreLoc(tree->AsLclVarCommon()); break; case GT_LIST: @@ -225,7 +152,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_RETURN: - TreeNodeInfoInitReturn(tree, info); + BuildReturn(tree); break; case GT_RETFILT: @@ -341,7 +268,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_DIV: case GT_UMOD: case GT_UDIV: - TreeNodeInfoInitModDiv(tree->AsOp(), info); + BuildModDiv(tree->AsOp()); break; case GT_MUL: @@ -349,27 +276,27 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) case GT_MUL_LONG: #endif - TreeNodeInfoInitMul(tree->AsOp(), info); + BuildMul(tree->AsOp()); break; case GT_INTRINSIC: - TreeNodeInfoInitIntrinsic(tree->AsOp(), info); + BuildIntrinsic(tree->AsOp()); break; #ifdef FEATURE_SIMD case GT_SIMD: - TreeNodeInfoInitSIMD(tree->AsSIMD(), info); + BuildSIMD(tree->AsSIMD()); break; #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWIntrinsic: - TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic(), info); + BuildHWIntrinsic(tree->AsHWIntrinsic()); break; #endif // FEATURE_HW_INTRINSICS case GT_CAST: - TreeNodeInfoInitCast(tree, info); + BuildCast(tree); break; case GT_BITCAST: @@ -421,7 +348,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_LSH_HI: case GT_RSH_LO: #endif - (void)TreeNodeInfoInitShiftRotate(tree, info); + (void)BuildShiftRotate(tree); break; case GT_EQ: @@ -433,7 +360,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_TEST_EQ: case GT_TEST_NE: case GT_CMP: - TreeNodeInfoInitCmp(tree, info); + BuildCmp(tree); break; case GT_CKFINITE: @@ -469,11 +396,11 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_PUTARG_REG: - TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info); + BuildPutArgReg(tree->AsUnOp()); break; case GT_CALL: - TreeNodeInfoInitCall(tree->AsCall(), info); + BuildCall(tree->AsCall()); break; case GT_ADDR: @@ -499,14 +426,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) #ifdef FEATURE_PUT_STRUCT_ARG_STK case GT_PUTARG_STK: - TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info); + BuildPutArgStk(tree->AsPutArgStk()); break; #endif // FEATURE_PUT_STRUCT_ARG_STK case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: - TreeNodeInfoInitBlockStore(tree->AsBlk(), info); + BuildBlockStore(tree->AsBlk()); break; case GT_INIT_VAL: @@ -515,7 +442,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_LCLHEAP: - TreeNodeInfoInitLclHeap(tree, info); + BuildLclHeap(tree); break; case GT_ARR_BOUNDS_CHECK: @@ -591,10 +518,10 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) case GT_STOREIND: if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { - TreeNodeInfoInitGCWriteBarrier(tree, info); + BuildGCWriteBarrier(tree); break; } - TreeNodeInfoInitIndir(tree->AsIndir(), info); + BuildIndir(tree->AsIndir()); break; case GT_NULLCHECK: @@ -604,7 +531,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) break; case GT_IND: - TreeNodeInfoInitIndir(tree->AsIndir(), info); + BuildIndir(tree->AsIndir()); assert(info->dstCount == 1); break; @@ -734,7 +661,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) } } - TreeNodeInfoInitCheckByteable(tree, info); + BuildCheckByteable(tree); // We need to be sure that we've set info->srcCount and info->dstCount appropriately assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT)); @@ -790,7 +717,7 @@ bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc) } //------------------------------------------------------------------------ -// TreeNodeInfoInitCheckByteable: Check the tree to see if "byte-able" registers are +// BuildCheckByteable: Check the tree to see if "byte-able" registers are // required, and set the tree node info accordingly. // // Arguments: @@ -799,9 +726,10 @@ bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildCheckByteable(GenTree* tree) { #ifdef _TARGET_X86_ + TreeNodeInfo* info = currentNodeInfo; // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands // if the tree node is a byte type. // @@ -893,39 +821,7 @@ bool LinearScan::isRMWRegOper(GenTree* tree) } //------------------------------------------------------------------------ -// TreeNodeInfoInitSimple: Sets the srcCount for all the trees -// without special handling based on the tree node type. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -void LinearScan::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info) -{ - if (tree->isContained()) - { - info->srcCount = 0; - return; - } - unsigned kind = tree->OperKind(); - if (kind & (GTK_CONST | GTK_LEAF)) - { - info->srcCount = 0; - } - else if (kind & (GTK_SMPOP)) - { - info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); - } - else - { - unreached(); - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN. +// BuildShiftRotate: Set the NodeInfo for a shift or rotate. // // Arguments: // tree - The node of interest @@ -933,90 +829,9 @@ void LinearScan::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info) -{ - assert(info->dstCount == 0); - GenTree* op1 = tree->gtGetOp1(); - -#if !defined(_TARGET_64BIT_) - if (tree->TypeGet() == TYP_LONG) - { - assert((op1->OperGet() == GT_LONG) && op1->isContained()); - GenTree* loVal = op1->gtGetOp1(); - GenTree* hiVal = op1->gtGetOp2(); - info->srcCount = 2; - LocationInfoListNode* loValInfo = getLocationInfo(loVal); - LocationInfoListNode* hiValInfo = getLocationInfo(hiVal); - loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO); - hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI); - useList.Append(loValInfo); - useList.Append(hiValInfo); - } - else -#endif // !defined(_TARGET_64BIT_) - if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) - { - regMaskTP useCandidates = RBM_NONE; - - info->srcCount = 1; - -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - if (varTypeIsStruct(tree)) - { - if (op1->OperGet() != GT_LCL_VAR) - { - noway_assert(op1->IsMultiRegCall()); - - ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); - useCandidates = retTypeDesc->GetABIReturnRegs(); - } - } - else -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - { - // Non-struct type return - determine useCandidates - switch (tree->TypeGet()) - { - case TYP_VOID: - unreached(); - case TYP_FLOAT: - useCandidates = RBM_FLOATRET; - break; - case TYP_DOUBLE: - useCandidates = RBM_DOUBLERET; - break; -#if defined(_TARGET_64BIT_) - case TYP_LONG: - useCandidates = RBM_LNGRET; - break; -#endif // defined(_TARGET_64BIT_) - default: - useCandidates = RBM_INTRET; - break; - } - } - - LocationInfoListNode* locationInfo = getLocationInfo(op1); - if (useCandidates != RBM_NONE) - { - locationInfo->info.setSrcCandidates(this, useCandidates); - } - useList.Append(locationInfo); - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info) +int LinearScan::BuildShiftRotate(GenTree* tree) { + TreeNodeInfo* info = currentNodeInfo; // For shift operations, we need that the number // of bits moved gets stored in CL in case // the number of bits to shift is not a constant. @@ -1044,16 +859,17 @@ int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info) } } -// Note that Rotate Left/Right instructions don't set ZF and SF flags. -// -// If the operand being shifted is 32-bits then upper three bits are masked -// by hardware to get actual shift count. Similarly for 64-bit operands -// shift count is narrowed to [0..63]. If the resulting shift count is zero, -// then shift operation won't modify flags. -// -// TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0) -// if the shift count is known to be non-zero and in the range depending on the -// operand size. + // Note that Rotate Left/Right instructions don't set ZF and SF flags. + // + // If the operand being shifted is 32-bits then upper three bits are masked + // by hardware to get actual shift count. Similarly for 64-bit operands + // shift count is narrowed to [0..63]. If the resulting shift count is zero, + // then shift operation won't modify flags. + // + // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0) + // if the shift count is known to be non-zero and in the range depending on the + // operand size. + CLANG_FORMAT_COMMENT_ANCHOR; #ifdef _TARGET_X86_ // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that @@ -1096,72 +912,7 @@ int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info) } //------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. -// -// Arguments: -// node - The PUTARG_REG node. -// argReg - The register in which to pass the argument. -// info - The info for the node's using call. -// isVarArgs - True if the call uses a varargs calling convention. -// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. -// -// Return Value: -// None. -// -void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info) -{ - assert(node != nullptr); - assert(node->OperIsPutArgReg()); - info->srcCount = 1; - regNumber argReg = node->gtRegNum; - assert(argReg != REG_NA); - - // Set the register requirements for the node. - const regMaskTP argMask = genRegMask(argReg); - info->setDstCandidates(this, argMask); - info->setSrcCandidates(this, argMask); - - // To avoid redundant moves, have the argument operand computed in the - // register in which the argument is passed to the call. - LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1); - op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this)); - op1Info->info.isDelayFree = true; - useList.Append(op1Info); -} - -//------------------------------------------------------------------------ -// HandleFloatVarArgs: Handle additional register requirements for a varargs call -// -// Arguments: -// call - The call node of interest -// argNode - The current argument -// -// Return Value: -// None. -// -// Notes: -// In the case of a varargs call, the ABI dictates that if we have floating point args, -// we must pass the enregistered arguments in both the integer and floating point registers. -// Since the integer register is not associated with the arg node, we will reserve it as -// an internal register on the call so that it is not used during the evaluation of the call node -// (e.g. for the target). -void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs) -{ -#if FEATURE_VARARG - if (call->IsVarargs() && varTypeIsFloating(argNode)) - { - *callHasFloatRegArgs = true; - - regNumber argReg = argNode->gtRegNum; - regNumber targetReg = compiler->getCallArgIntRegister(argReg); - info->setInternalIntCount(info->internalIntCount + 1); - info->addInternalCandidates(this, genRegMask(targetReg)); - } -#endif // FEATURE_VARARG -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitCall: Set the NodeInfo for a call. +// BuildCall: Set the NodeInfo for a call. // // Arguments: // call - The call node of interest @@ -1169,8 +920,9 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTr // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) +void LinearScan::BuildCall(GenTreeCall* call) { + TreeNodeInfo* info = currentNodeInfo; bool hasMultiRegRetVal = false; ReturnTypeDesc* retTypeDesc = nullptr; @@ -1276,7 +1028,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) if (argNode->OperIsPutArgReg()) { info->srcCount++; - HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs); + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); appendLocationInfoToList(argNode); } #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING @@ -1286,7 +1038,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) { assert(entry->Current()->OperIsPutArgReg()); info->srcCount++; - HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs); + HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); appendLocationInfoToList(entry->Current()); } } @@ -1416,7 +1168,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) } //------------------------------------------------------------------------ -// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. +// BuildBlockStore: Set the NodeInfo for a block store. // // Arguments: // blkNode - The block store node of interest @@ -1424,11 +1176,12 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info) +void LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { - GenTree* dstAddr = blkNode->Addr(); - unsigned size = blkNode->gtBlkSize; - GenTree* source = blkNode->Data(); + TreeNodeInfo* info = currentNodeInfo; + GenTree* dstAddr = blkNode->Addr(); + unsigned size = blkNode->gtBlkSize; + GenTree* source = blkNode->Data(); LocationInfoListNode* dstAddrInfo = nullptr; LocationInfoListNode* sourceInfo = nullptr; @@ -1663,7 +1416,7 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* i #ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK. +// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK. // // Arguments: // tree - The node of interest @@ -1671,9 +1424,10 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* i // Return Value: // None. // -void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNodeInfo* info) +void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) { - info->srcCount = 0; + TreeNodeInfo* info = currentNodeInfo; + info->srcCount = 0; assert(info->dstCount == 0); if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST) @@ -1774,7 +1528,7 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNode if (type != TYP_STRUCT) { - TreeNodeInfoInitSimple(putArgStk, info); + BuildSimple(putArgStk); return; } @@ -1838,7 +1592,7 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNode #endif // FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ -// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP. +// BuildLclHeap: Set the NodeInfo for a GT_LCLHEAP. // // Arguments: // tree - The node of interest @@ -1846,9 +1600,10 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNode // Return Value: // None. // -void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildLclHeap(GenTree* tree) { - info->srcCount = 1; + TreeNodeInfo* info = currentNodeInfo; + info->srcCount = 1; assert(info->dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): @@ -1932,7 +1687,7 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info) } //------------------------------------------------------------------------ -// TreeNodeInfoInitModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV. +// BuildModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV. // // Arguments: // tree - The node of interest @@ -1940,10 +1695,11 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildModDiv(GenTree* tree) { - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); + TreeNodeInfo* info = currentNodeInfo; + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); assert(info->dstCount == 1); @@ -2013,7 +1769,7 @@ void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info) } //------------------------------------------------------------------------ -// TreeNodeInfoInitIntrinsic: Set the NodeInfo for a GT_INTRINSIC. +// BuildIntrinsic: Set the NodeInfo for a GT_INTRINSIC. // // Arguments: // tree - The node of interest @@ -2021,8 +1777,9 @@ void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildIntrinsic(GenTree* tree) { + TreeNodeInfo* info = currentNodeInfo; // Both operand and its result must be of floating point type. GenTree* op1 = tree->gtGetOp1(); assert(varTypeIsFloating(op1)); @@ -2081,7 +1838,7 @@ void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info) #ifdef FEATURE_SIMD //------------------------------------------------------------------------ -// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree. +// BuildSIMD: Set the NodeInfo for a GT_SIMD tree. // // Arguments: // tree - The GT_SIMD node of interest @@ -2089,8 +1846,9 @@ void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info) // Return Value: // None. -void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) +void LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { + TreeNodeInfo* info = currentNodeInfo; // Only SIMDIntrinsicInit can be contained. Other than that, // only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount. if (simdTree->isContained()) @@ -2481,7 +2239,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) #ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ -// TreeNodeInfoInitHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree. +// BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree. // // Arguments: // tree - The GT_HWIntrinsic node of interest @@ -2489,8 +2247,9 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) // Return Value: // None. -void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info) +void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { + TreeNodeInfo* info = currentNodeInfo; NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID); if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2) @@ -2601,7 +2360,7 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, #endif //------------------------------------------------------------------------ -// TreeNodeInfoInitCast: Set the NodeInfo for a GT_CAST. +// BuildCast: Set the NodeInfo for a GT_CAST. // // Arguments: // tree - The node of interest @@ -2609,8 +2368,9 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildCast(GenTree* tree) { + TreeNodeInfo* info = currentNodeInfo; // TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register. // see CodeGen::genIntToIntCast() @@ -2642,77 +2402,15 @@ void LinearScan::TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info) } } -//------------------------------------------------------------------------ -// TreeNodeInfoInitGCWriteBarrier: Set the NodeInfo for a GT_STOREIND requiring a write barrier. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info) -{ - assert(tree->OperGet() == GT_STOREIND); - - GenTreeStoreInd* dst = tree->AsStoreInd(); - GenTree* addr = dst->Addr(); - GenTree* src = dst->Data(); - LocationInfoListNode* addrInfo = getLocationInfo(addr); - LocationInfoListNode* srcInfo = getLocationInfo(src); - - // In the case where we are doing a helper assignment, we need to actually instantiate the - // address in a register. - assert(!addr->isContained() && !src->isContained()); - useList.Append(addrInfo); - useList.Append(srcInfo); - info->srcCount = 2; - assert(info->dstCount == 0); - - bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src); - -#if NOGC_WRITE_BARRIERS - -#if defined(_TARGET_X86_) - - if (useOptimizedWriteBarrierHelper) - { - // Special write barrier: - // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and - // op2 (src) goes into any int register. - addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); - srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC); - } - -#else // !defined(_TARGET_X86_) -#error "NOGC_WRITE_BARRIERS is not supported" -#endif // !defined(_TARGET_X86_) - -#endif // NOGC_WRITE_BARRIERS - - if (!useOptimizedWriteBarrierHelper) - { - // For the standard JIT Helper calls: - // op1 (addr) goes into REG_ARG_0 and - // op2 (src) goes into REG_ARG_1 - addrInfo->info.setSrcCandidates(this, RBM_ARG_0); - srcInfo->info.setSrcCandidates(this, RBM_ARG_1); - } - - // Both src and dst must reside in a register, which they should since we haven't set - // either of them as contained. - assert(addrInfo->info.dstCount == 1); - assert(srcInfo->info.dstCount == 1); -} - //----------------------------------------------------------------------------------------- -// TreeNodeInfoInitIndir: Specify register requirements for address expression of an indirection operation. +// BuildIndir: Specify register requirements for address expression of an indirection operation. // // Arguments: // indirTree - GT_IND or GT_STOREIND gentree node // -void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info) +void LinearScan::BuildIndir(GenTreeIndir* indirTree) { + TreeNodeInfo* info = currentNodeInfo; // If this is the rhs of a block copy (i.e. non-enregisterable struct), // it has no register requirements. if (indirTree->TypeGet() == TYP_STRUCT) @@ -2733,7 +2431,7 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* in if (source->OperIsShiftOrRotate()) { - info->srcCount += TreeNodeInfoInitShiftRotate(source, info); + info->srcCount += BuildShiftRotate(source); } else { @@ -2817,40 +2515,7 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* in } //------------------------------------------------------------------------ -// TreeNodeInfoInitCmp: Set the register requirements for a compare. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -void LinearScan::TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info) -{ - assert(tree->OperIsCompare() || tree->OperIs(GT_CMP)); - - info->srcCount = 0; - assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID)); - -#ifdef _TARGET_X86_ - // If the compare is used by a jump, we just need to set the condition codes. If not, then we need - // to store the result into the low byte of a register, which requires the dst be a byteable register. - // We always set the dst candidates, though, because if this is compare is consumed by a jump, they - // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear - // that flag is maintained until this location (especially for decomposed long compares). - info->setDstCandidates(this, RBM_BYTE_REGS); -#endif // _TARGET_X86_ - - GenTree* op1 = tree->gtOp.gtOp1; - GenTree* op2 = tree->gtOp.gtOp2; - var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); - - info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitMul: Set the NodeInfo for a multiply. +// BuildMul: Set the NodeInfo for a multiply. // // Arguments: // tree - The node of interest @@ -2858,8 +2523,9 @@ void LinearScan::TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitMul(GenTree* tree, TreeNodeInfo* info) +void LinearScan::BuildMul(GenTree* tree) { + TreeNodeInfo* info = currentNodeInfo; #if defined(_TARGET_X86_) assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG)); #else |