diff options
Diffstat (limited to 'src/jit/lsra.cpp')
-rw-r--r-- | src/jit/lsra.cpp | 11578 |
1 files changed, 11578 insertions, 0 deletions
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp new file mode 100644 index 0000000000..317b976e42 --- /dev/null +++ b/src/jit/lsra.cpp @@ -0,0 +1,11578 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + + Linear Scan Register Allocation + + a.k.a. LSRA + + Preconditions + - All register requirements are expressed in the code stream, either as destination + registers of tree nodes, or as internal registers. These requirements are + expressed in the TreeNodeInfo (gtLsraInfo) on each node, which includes: + - The number of register sources and destinations. + - The register restrictions (candidates) of the target register, both from itself, + as producer of the value (dstCandidates), and from its consuming node (srcCandidates). + Note that the srcCandidates field of TreeNodeInfo refers to the destination register + (not any of its sources). + - The number (internalCount) of registers required, and their register restrictions (internalCandidates). + These are neither inputs nor outputs of the node, but used in the sequence of code generated for the tree. + "Internal registers" are registers used during the code sequence generated for the node. + The register lifetimes must obey the following lifetime model: + - First, any internal registers are defined. + - Next, any source registers are used (and are then freed if they are last use and are not identified as + "delayRegFree"). + - Next, the internal registers are used (and are then freed). + - Next, any registers in the kill set for the instruction are killed. + - Next, the destination register(s) are defined (multiple destination registers are only supported on ARM) + - Finally, any "delayRegFree" source registers are freed. + There are several things to note about this order: + - The internal registers will never overlap any use, but they may overlap a destination register. + - Internal registers are never live beyond the node. + - The "delayRegFree" annotation is used for instructions that are only available in a Read-Modify-Write form. + That is, the destination register is one of the sources. In this case, we must not use the same register for + the non-RMW operand as for the destination. + + Overview (doLinearScan): + - Walk all blocks, building intervals and RefPositions (buildIntervals) + - Traverse the RefPositions, marking last uses (setLastUses) + - Note that this is necessary because the execution order doesn't accurately reflect use order. + There is a "TODO-Throughput" to eliminate this. + - Allocate registers (allocateRegisters) + - Annotate nodes with register assignments (resolveRegisters) + - Add move nodes as needed to resolve conflicting register + assignments across non-adjacent edges. (resolveEdges, called from resolveRegisters) + + Postconditions: + + Tree nodes (GenTree): + - GenTree::gtRegNum (and gtRegPair for ARM) is annotated with the register + assignment for a node. If the node does not require a register, it is + annotated as such (for single registers, gtRegNum = REG_NA; for register + pair type, gtRegPair = REG_PAIR_NONE). For a variable definition or interior + tree node (an "implicit" definition), this is the register to put the result. + For an expression use, this is the place to find the value that has previously + been computed. + - In most cases, this register must satisfy the constraints specified by the TreeNodeInfo. + - In some cases, this is difficult: + - If a lclVar node currently lives in some register, it may not be desirable to move it + (i.e. its current location may be desirable for future uses, e.g. if it's a callee save register, + but needs to be in a specific arg register for a call). + - In other cases there may be conflicts on the restrictions placed by the defining node and the node which + consumes it + - If such a node is constrained to a single fixed register (e.g. an arg register, or a return from a call), + then LSRA is free to annotate the node with a different register. The code generator must issue the appropriate + move. + - However, if such a node is constrained to a set of registers, and its current location does not satisfy that + requirement, LSRA must insert a GT_COPY node between the node and its parent. The gtRegNum on the GT_COPY node + must satisfy the register requirement of the parent. + - GenTree::gtRsvdRegs has a set of registers used for internal temps. + - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been + evaluated. + - LSRA currently does not set GTF_SPILLED on such nodes, because it caused problems in the old code generator. + In the new backend perhaps this should change (see also the note below under CodeGen). + - A tree node is marked GTF_SPILLED if it is a lclVar that must be reloaded prior to use. + - The register (gtRegNum) on the node indicates the register to which it must be reloaded. + - For lclVar nodes, since the uses and defs are distinct tree nodes, it is always possible to annotate the node + with the register to which the variable must be reloaded. + - For other nodes, since they represent both the def and use, if the value must be reloaded to a different + register, LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded. + + Local variable table (LclVarDsc): + - LclVarDsc::lvRegister is set to true if a local variable has the + same register assignment for its entire lifetime. + - LclVarDsc::lvRegNum / lvOtherReg: these are initialized to their + first value at the end of LSRA (it looks like lvOtherReg isn't? + This is probably a bug (ARM)). Codegen will set them to their current value + as it processes the trees, since a variable can (now) be assigned different + registers over its lifetimes. + +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator + +#include "lsra.h" + +#ifdef DEBUG +const char* LinearScan::resolveTypeName[] = {"Split", "Join", "Critical", "SharedCritical"}; +#endif // DEBUG + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Small Helper functions XX +XX XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +//-------------------------------------------------------------- +// lsraAssignRegToTree: Assign the given reg to tree node. +// +// Arguments: +// tree - Gentree node +// reg - register to be assigned +// regIdx - register idx, if tree is a multi-reg call node. +// regIdx will be zero for single-reg result producing tree nodes. +// +// Return Value: +// None +// +void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx) +{ + if (regIdx == 0) + { + tree->gtRegNum = reg; + } + else + { + assert(tree->IsMultiRegCall()); + GenTreeCall* call = tree->AsCall(); + call->SetRegNumByIdx(reg, regIdx); + } +} + +//------------------------------------------------------------- +// getWeight: Returns the weight of the RefPosition. +// +// Arguments: +// refPos - ref position +// +// Returns: +// Weight of ref position. +unsigned LinearScan::getWeight(RefPosition* refPos) +{ + unsigned weight; + GenTreePtr treeNode = refPos->treeNode; + + if (treeNode != nullptr) + { + if (isCandidateLocalRef(treeNode)) + { + // Tracked locals: use weighted ref cnt as the weight of the + // ref position. + GenTreeLclVarCommon* lclCommon = treeNode->AsLclVarCommon(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclCommon->gtLclNum]); + weight = varDsc->lvRefCntWtd; + } + else + { + // Non-candidate local ref or non-lcl tree node. + // These are considered to have two references in the basic block: + // a def and a use and hence weighted ref count is 2 times + // the basic block weight in which they appear. + weight = 2 * this->blockInfo[refPos->bbNum].weight; + } + } + else + { + // Non-tree node ref positions. These will have a single + // reference in the basic block and hence their weighted + // refcount is equal to the block weight in which they + // appear. + weight = this->blockInfo[refPos->bbNum].weight; + } + + return weight; +} + +// allRegs represents a set of registers that can +// be used to allocate the specified type in any point +// in time (more of a 'bank' of registers). +regMaskTP LinearScan::allRegs(RegisterType rt) +{ + if (rt == TYP_FLOAT) + { + return availableFloatRegs; + } + else if (rt == TYP_DOUBLE) + { + return availableDoubleRegs; +#ifdef FEATURE_SIMD + // TODO-Cleanup: Add an RBM_ALLSIMD + } + else if (varTypeIsSIMD(rt)) + { + return availableDoubleRegs; +#endif // FEATURE_SIMD + } + else + { + return availableIntRegs; + } +} + +//-------------------------------------------------------------------------- +// allMultiRegCallNodeRegs: represents a set of registers that can be used +// to allocate a multi-reg call node. +// +// Arguments: +// call - Multi-reg call node +// +// Return Value: +// Mask representing the set of available registers for multi-reg call +// node. +// +// Note: +// Multi-reg call node available regs = Bitwise-OR(allregs(GetReturnRegType(i))) +// for all i=0..RetRegCount-1. +regMaskTP LinearScan::allMultiRegCallNodeRegs(GenTreeCall* call) +{ + assert(call->HasMultiRegRetVal()); + + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + regMaskTP resultMask = allRegs(retTypeDesc->GetReturnRegType(0)); + + unsigned count = retTypeDesc->GetReturnRegCount(); + for (unsigned i = 1; i < count; ++i) + { + resultMask |= allRegs(retTypeDesc->GetReturnRegType(i)); + } + + return resultMask; +} + +//-------------------------------------------------------------------------- +// allRegs: returns the set of registers that can accomodate the type of +// given node. +// +// Arguments: +// tree - GenTree node +// +// Return Value: +// Mask representing the set of available registers for given tree +// +// Note: In case of multi-reg call node, the full set of registers must be +// determined by looking at types of individual return register types. +// In this case, the registers may include registers from different register +// sets and will not be limited to the actual ABI return registers. +regMaskTP LinearScan::allRegs(GenTree* tree) +{ + regMaskTP resultMask; + + // In case of multi-reg calls, allRegs is defined as + // Bitwise-Or(allRegs(GetReturnRegType(i)) for i=0..ReturnRegCount-1 + if (tree->IsMultiRegCall()) + { + resultMask = allMultiRegCallNodeRegs(tree->AsCall()); + } + else + { + resultMask = allRegs(tree->TypeGet()); + } + + return resultMask; +} + +regMaskTP LinearScan::allSIMDRegs() +{ + return availableFloatRegs; +} + +//------------------------------------------------------------------------ +// internalFloatRegCandidates: Return the set of registers that are appropriate +// for use as internal float registers. +// +// Return Value: +// The set of registers (as a regMaskTP). +// +// Notes: +// compFloatingPointUsed is only required to be set if it is possible that we +// will use floating point callee-save registers. +// It is unlikely, if an internal register is the only use of floating point, +// that it will select a callee-save register. But to be safe, we restrict +// the set of candidates if compFloatingPointUsed is not already set. + +regMaskTP LinearScan::internalFloatRegCandidates() +{ + if (compiler->compFloatingPointUsed) + { + return allRegs(TYP_FLOAT); + } + else + { + return RBM_FLT_CALLEE_TRASH; + } +} + +/***************************************************************************** + * Register types + *****************************************************************************/ +template <class T> +RegisterType regType(T type) +{ +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(type)) + { + return FloatRegisterType; + } +#endif // FEATURE_SIMD + return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType; +} + +bool useFloatReg(var_types type) +{ + return (regType(type) == FloatRegisterType); +} + +bool registerTypesEquivalent(RegisterType a, RegisterType b) +{ + return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b); +} + +bool isSingleRegister(regMaskTP regMask) +{ + return (regMask != RBM_NONE && genMaxOneBit(regMask)); +} + +/***************************************************************************** + * Inline functions for RegRecord + *****************************************************************************/ + +bool RegRecord::isFree() +{ + return ((assignedInterval == nullptr || !assignedInterval->isActive) && !isBusyUntilNextKill); +} + +/***************************************************************************** + * Inline functions for LinearScan + *****************************************************************************/ +RegRecord* LinearScan::getRegisterRecord(regNumber regNum) +{ + return &physRegs[regNum]; +} + +#ifdef DEBUG +//------------------------------------------------------------------------ +// stressLimitRegs: Given a set of registers, expressed as a register mask, reduce +// them based on the current stress options. +// +// Arguments: +// mask - The current mask of register candidates for a node +// +// Return Value: +// A possibly-modified mask, based on the value of COMPlus_JitStressRegs. +// +// Notes: +// This is the method used to implement the stress options that limit +// the set of registers considered for allocation. + +regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) +{ + if (getStressLimitRegs() != LSRA_LIMIT_NONE) + { + switch (getStressLimitRegs()) + { + case LSRA_LIMIT_CALLEE: + if (!compiler->opts.compDbgEnC && (mask & RBM_CALLEE_SAVED) != RBM_NONE) + { + mask &= RBM_CALLEE_SAVED; + } + break; + case LSRA_LIMIT_CALLER: + if ((mask & RBM_CALLEE_TRASH) != RBM_NONE) + { + mask &= RBM_CALLEE_TRASH; + } + break; + case LSRA_LIMIT_SMALL_SET: + if ((mask & LsraLimitSmallIntSet) != RBM_NONE) + { + mask &= LsraLimitSmallIntSet; + } + else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) + { + mask &= LsraLimitSmallFPSet; + } + break; + default: + unreached(); + } + if (refPosition != nullptr && refPosition->isFixedRegRef) + { + mask |= refPosition->registerAssignment; + } + } + return mask; +} +#endif // DEBUG + +// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately +// set such fields as isStructField + +Interval* LinearScan::newInterval(RegisterType theRegisterType) +{ + intervals.emplace_back(theRegisterType, allRegs(theRegisterType)); + Interval* newInt = &intervals.back(); + +#ifdef DEBUG + newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1); +#endif // DEBUG + + DBEXEC(VERBOSE, newInt->dump()); + return newInt; +} + +RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) +{ + refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType); + RefPosition* newRP = &refPositions.back(); +#ifdef DEBUG + newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1); +#endif // DEBUG + return newRP; +} + +//------------------------------------------------------------------------ +// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use +// register requirements on a single-def, single-use interval. +// +// Arguments: +// defRefPosition - The interval definition +// useRefPosition - The (sole) interval use +// +// Return Value: +// None. +// +// Assumptions: +// The two RefPositions are for the same interval, which is a tree-temp. +// +// Notes: +// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg. +// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that, +// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs +// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4 +// below. +// Here are the cases we consider (in this order): +// 1. If The defRefPosition specifies a single register, and there are no conflicting +// FixedReg uses of it between the def and use, we use that register, and the code generator +// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def. +// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no +// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator +// will insert the copy. +// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined +// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register), +/// we set the register requirements on the defRefPosition to the use registers, and the +// code generator will insert a copy on the def. We can't rely on the code generator to put a copy +// on the use if it has multiple possible candidates, as it won't know which one has been allocated. +// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register +// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set +// the useRefPosition to the def registers, for similar reasons to case #3. +// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts, +// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are +// single registers, codegen can insert the copy. +// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but +// has a conflict), we must insert a copy. The copy will be inserted before the use if the +// use is not fixed (in the fixed case, the code generator will insert the use). +// +// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is +// available for the lifetime. We end up allocating a register that must be spilled, and it probably +// won't be the register that is actually defined by the target instruction. So, we have to copy it +// and THEN spill it. In this case, we should be using the def requirement. But we need to change +// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but +// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway +// we can use the fixed-reg on the def. +// + +void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition) +{ + assert(!interval->isLocalVar); + + RefPosition* useRefPosition = defRefPosition->nextRefPosition; + regMaskTP defRegAssignment = defRefPosition->registerAssignment; + regMaskTP useRegAssignment = useRefPosition->registerAssignment; + RegRecord* defRegRecord = nullptr; + RegRecord* useRegRecord = nullptr; + regNumber defReg = REG_NA; + regNumber useReg = REG_NA; + bool defRegConflict = false; + bool useRegConflict = false; + + // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment + // on it, or we will fail to ensure that the fixedReg is busy at the time the target + // (of the node that uses this interval) is allocated. + bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT)); + if (!canChangeUseAssignment) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE)); + } + if (defRefPosition->isFixedRegRef) + { + defReg = defRefPosition->assignedReg(); + defRegRecord = getRegisterRecord(defReg); + if (canChangeUseAssignment) + { + RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; + assert(currFixedRegRefPosition != nullptr && + currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation); + + if (currFixedRegRefPosition->nextRefPosition == nullptr || + currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation()) + { + // This is case #1. Use the defRegAssignment + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1)); + useRefPosition->registerAssignment = defRegAssignment; + return; + } + else + { + defRegConflict = true; + } + } + } + if (useRefPosition->isFixedRegRef) + { + useReg = useRefPosition->assignedReg(); + useRegRecord = getRegisterRecord(useReg); + RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition; + + // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null. + RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition(); + assert(nextFixedRegRefPosition != nullptr && + nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation); + + // First, check to see if there are any conflicting FixedReg references between the def and use. + if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation) + { + // OK, no conflicting FixedReg references. + // Now, check to see whether it is currently in use. + if (useRegRecord->assignedInterval != nullptr) + { + RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition; + LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation(); + if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation) + { + useRegConflict = true; + } + } + if (!useRegConflict) + { + // This is case #2. Use the useRegAssignment + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2)); + defRefPosition->registerAssignment = useRegAssignment; + return; + } + } + else + { + useRegConflict = true; + } + } + if (defRegRecord != nullptr && !useRegConflict) + { + // This is case #3. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3)); + defRefPosition->registerAssignment = useRegAssignment; + return; + } + if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) + { + // This is case #4. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4)); + useRefPosition->registerAssignment = defRegAssignment; + return; + } + if (defRegRecord != nullptr && useRegRecord != nullptr) + { + // This is case #5. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5)); + RegisterType regType = interval->registerType; + assert((getRegisterType(interval, defRefPosition) == regType) && + (getRegisterType(interval, useRefPosition) == regType)); + regMaskTP candidates = allRegs(regType); + defRefPosition->registerAssignment = candidates; + return; + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6)); + return; +} + +//------------------------------------------------------------------------ +// conflictingFixedRegReference: Determine whether the current RegRecord has a +// fixed register use that conflicts with 'refPosition' +// +// Arguments: +// refPosition - The RefPosition of interest +// +// Return Value: +// Returns true iff the given RefPosition is NOT a fixed use of this register, +// AND either: +// - there is a RefPosition on this RegRecord at the nodeLocation of the given RefPosition, or +// - the given RefPosition has a delayRegFree, and there is a RefPosition on this RegRecord at +// the nodeLocation just past the given RefPosition. +// +// Assumptions: +// 'refPosition is non-null. + +bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition) +{ + // Is this a fixed reference of this register? If so, there is no conflict. + if (refPosition->isFixedRefOfRegMask(genRegMask(regNum))) + { + return false; + } + // Otherwise, check for conflicts. + // There is a conflict if: + // 1. There is a recent RefPosition on this RegRecord that is at this location, + // except in the case where it is a special "putarg" that is associated with this interval, OR + // 2. There is an upcoming RefPosition at this location, or at the next location + // if refPosition is a delayed use (i.e. must be kept live through the next/def location). + + LsraLocation refLocation = refPosition->nodeLocation; + if (recentRefPosition != nullptr && recentRefPosition->refType != RefTypeKill && + recentRefPosition->nodeLocation == refLocation && + (!isBusyUntilNextKill || assignedInterval != refPosition->getInterval())) + { + return true; + } + LsraLocation nextPhysRefLocation = getNextRefLocation(); + if (nextPhysRefLocation == refLocation || (refPosition->delayRegFree && nextPhysRefLocation == (refLocation + 1))) + { + return true; + } + return false; +} + +void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) +{ +#ifdef _TARGET_AMD64_ + if (compiler->opts.compDbgEnC) + { + // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs. + return; + } +#endif // _TARGET_AMD64_ + + Interval* theInterval = rp->getInterval(); +#ifdef DEBUG + regMaskTP calleeSaveMask = calleeSaveRegs(getRegisterType(theInterval, rp)); + if (doReverseCallerCallee()) + { + regMaskTP newAssignment = rp->registerAssignment; + newAssignment &= calleeSaveMask; + if (newAssignment != RBM_NONE) + { + rp->registerAssignment = newAssignment; + } + } + else +#endif // DEBUG + { + // Set preferences so that this register set will be preferred for earlier refs + theInterval->updateRegisterPreferences(rp->registerAssignment); + } +} + +void LinearScan::associateRefPosWithInterval(RefPosition* rp) +{ + Referenceable* theReferent = rp->referent; + + if (theReferent != nullptr) + { + // All RefPositions except the dummy ones at the beginning of blocks + + if (rp->isIntervalRef()) + { + Interval* theInterval = rp->getInterval(); + + applyCalleeSaveHeuristics(rp); + + // Ensure that we have consistent def/use on SDSU temps. + // However, in the case of a non-commutative rmw def, we must avoid over-constraining + // the def, so don't propagate a single-register restriction from the consumer to the producer + + if (RefTypeIsUse(rp->refType) && !theInterval->isLocalVar) + { + RefPosition* prevRefPosition = theInterval->recentRefPosition; + assert(prevRefPosition != nullptr && theInterval->firstRefPosition == prevRefPosition); + regMaskTP prevAssignment = prevRefPosition->registerAssignment; + regMaskTP newAssignment = (prevAssignment & rp->registerAssignment); + if (newAssignment != RBM_NONE) + { + if (!theInterval->hasNonCommutativeRMWDef || !isSingleRegister(newAssignment)) + { + prevRefPosition->registerAssignment = newAssignment; + } + } + else + { + theInterval->hasConflictingDefUse = true; + } + } + } + + RefPosition* prevRP = theReferent->recentRefPosition; + if (prevRP != nullptr) + { + prevRP->nextRefPosition = rp; + } + else + { + theReferent->firstRefPosition = rp; + } + theReferent->recentRefPosition = rp; + theReferent->lastRefPosition = rp; + } + else + { + assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); + } +} + +//--------------------------------------------------------------------------- +// newRefPosition: allocate and initialize a new RefPosition. +// +// Arguments: +// reg - reg number that identifies RegRecord to be associated +// with this RefPosition +// theLocation - LSRA location of RefPosition +// theRefType - RefPosition type +// theTreeNode - GenTree node for which this RefPosition is created +// mask - Set of valid registers for this RefPosition +// multiRegIdx - register position if this RefPosition corresponds to a +// multi-reg call node. +// +// Return Value: +// a new RefPosition +// +RefPosition* LinearScan::newRefPosition( + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) +{ + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); + + newRP->setReg(getRegisterRecord(reg)); + newRP->registerAssignment = mask; + + newRP->setMultiRegIdx(0); + newRP->setAllocateIfProfitable(0); + + associateRefPosWithInterval(newRP); + + DBEXEC(VERBOSE, newRP->dump()); + return newRP; +} + +//--------------------------------------------------------------------------- +// newRefPosition: allocate and initialize a new RefPosition. +// +// Arguments: +// theInterval - interval to which RefPosition is associated with. +// theLocation - LSRA location of RefPosition +// theRefType - RefPosition type +// theTreeNode - GenTree node for which this RefPosition is created +// mask - Set of valid registers for this RefPosition +// multiRegIdx - register position if this RefPosition corresponds to a +// multi-reg call node. +// +// Return Value: +// a new RefPosition +// +RefPosition* LinearScan::newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskTP mask, + unsigned multiRegIdx /* = 0 */) +{ +#ifdef DEBUG + if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) + { + // In the case we're using floating point registers we must make sure + // this flag was set previously in the compiler since this will mandate + // whether LSRA will take into consideration FP reg killsets. + assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0)); + } +#endif // DEBUG + + // If this reference is constrained to a single register (and it's not a dummy + // or Kill reftype already), add a RefTypeFixedReg at this location so that its + // availability can be more accurately determined + + bool isFixedRegister = isSingleRegister(mask); + bool insertFixedRef = false; + if (isFixedRegister) + { + // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB) + if (theRefType == RefTypeUse || theRefType == RefTypeDef) + { + insertFixedRef = true; + } + } + + if (insertFixedRef) + { + regNumber physicalReg = genRegNumFromMask(mask); + RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); + assert(theInterval != nullptr); + assert((allRegs(theInterval->registerType) & mask) != 0); + } + + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); + + newRP->setInterval(theInterval); + + // Spill info + newRP->isFixedRegRef = isFixedRegister; + +#ifndef _TARGET_AMD64_ + // We don't need this for AMD because the PInvoke method epilog code is explicit + // at register allocation time. + if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged && + theInterval->varNum == compiler->genReturnLocal) + { + mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); + noway_assert(mask != RBM_NONE); + } +#endif // !_TARGET_AMD64_ + newRP->registerAssignment = mask; + + newRP->setMultiRegIdx(multiRegIdx); + newRP->setAllocateIfProfitable(0); + + associateRefPosWithInterval(newRP); + + DBEXEC(VERBOSE, newRP->dump()); + return newRP; +} + +/***************************************************************************** + * Inline functions for Interval + *****************************************************************************/ +RefPosition* Referenceable::getNextRefPosition() +{ + if (recentRefPosition == nullptr) + { + return firstRefPosition; + } + else + { + return recentRefPosition->nextRefPosition; + } +} + +LsraLocation Referenceable::getNextRefLocation() +{ + RefPosition* nextRefPosition = getNextRefPosition(); + if (nextRefPosition == nullptr) + { + return MaxLocation; + } + else + { + return nextRefPosition->nodeLocation; + } +} + +// Iterate through all the registers of the given type +class RegisterIterator +{ + friend class Registers; + +public: + RegisterIterator(RegisterType type) : regType(type) + { + if (useFloatReg(regType)) + { + currentRegNum = REG_FP_FIRST; + } + else + { + currentRegNum = REG_INT_FIRST; + } + } + +protected: + static RegisterIterator Begin(RegisterType regType) + { + return RegisterIterator(regType); + } + static RegisterIterator End(RegisterType regType) + { + RegisterIterator endIter = RegisterIterator(regType); + // This assumes only integer and floating point register types + // if we target a processor with additional register types, + // this would have to change + if (useFloatReg(regType)) + { + // This just happens to work for both double & float + endIter.currentRegNum = REG_NEXT(REG_FP_LAST); + } + else + { + endIter.currentRegNum = REG_NEXT(REG_INT_LAST); + } + return endIter; + } + +public: + void operator++(int dummy) // int dummy is c++ for "this is postfix ++" + { + currentRegNum = REG_NEXT(currentRegNum); +#ifdef _TARGET_ARM_ + if (regType == TYP_DOUBLE) + currentRegNum = REG_NEXT(currentRegNum); +#endif + } + void operator++() // prefix operator++ + { + currentRegNum = REG_NEXT(currentRegNum); +#ifdef _TARGET_ARM_ + if (regType == TYP_DOUBLE) + currentRegNum = REG_NEXT(currentRegNum); +#endif + } + regNumber operator*() + { + return currentRegNum; + } + bool operator!=(const RegisterIterator& other) + { + return other.currentRegNum != currentRegNum; + } + +private: + regNumber currentRegNum; + RegisterType regType; +}; + +class Registers +{ +public: + friend class RegisterIterator; + RegisterType type; + Registers(RegisterType t) + { + type = t; + } + RegisterIterator begin() + { + return RegisterIterator::Begin(type); + } + RegisterIterator end() + { + return RegisterIterator::End(type); + } +}; + +#ifdef DEBUG +void LinearScan::dumpVarToRegMap(VarToRegMap map) +{ + bool anyPrinted = false; + for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + if (map[varIndex] != REG_STK) + { + printf("V%02u=%s ", varNum, getRegName(map[varIndex])); + anyPrinted = true; + } + } + if (!anyPrinted) + { + printf("none"); + } + printf("\n"); +} + +void LinearScan::dumpInVarToRegMap(BasicBlock* block) +{ + printf("Var=Reg beg of BB%02u: ", block->bbNum); + VarToRegMap map = getInVarToRegMap(block->bbNum); + dumpVarToRegMap(map); +} + +void LinearScan::dumpOutVarToRegMap(BasicBlock* block) +{ + printf("Var=Reg end of BB%02u: ", block->bbNum); + VarToRegMap map = getOutVarToRegMap(block->bbNum); + dumpVarToRegMap(map); +} + +#endif // DEBUG + +LinearScanInterface* getLinearScanAllocator(Compiler* comp) +{ + return new (comp, CMK_LSRA) LinearScan(comp); +} + +//------------------------------------------------------------------------ +// LSRA constructor +// +// Arguments: +// theCompiler +// +// Notes: +// The constructor takes care of initializing the data structures that are used +// during Lowering, including (in DEBUG) getting the stress environment variables, +// as they may affect the block ordering. + +LinearScan::LinearScan(Compiler* theCompiler) + : compiler(theCompiler) +#if MEASURE_MEM_ALLOC + , lsraIAllocator(nullptr) +#endif // MEASURE_MEM_ALLOC + , intervals(LinearScanMemoryAllocatorInterval(theCompiler)) + , refPositions(LinearScanMemoryAllocatorRefPosition(theCompiler)) +{ +#ifdef DEBUG + maxNodeLocation = 0; + activeRefPosition = nullptr; + + // Get the value of the environment variable that controls stress for register allocation + lsraStressMask = JitConfig.JitStressRegs(); +#if 0 +#ifdef DEBUG + if (lsraStressMask != 0) + { + // The code in this #if can be used to debug JitStressRegs issues according to + // method hash. To use, simply set environment variables JitStressRegsHashLo and JitStressRegsHashHi + unsigned methHash = compiler->info.compMethodHash(); + char* lostr = getenv("JitStressRegsHashLo"); + unsigned methHashLo = 0; + bool dump = false; + if (lostr != nullptr) + { + sscanf_s(lostr, "%x", &methHashLo); + dump = true; + } + char* histr = getenv("JitStressRegsHashHi"); + unsigned methHashHi = UINT32_MAX; + if (histr != nullptr) + { + sscanf_s(histr, "%x", &methHashHi); + dump = true; + } + if (methHash < methHashLo || methHash > methHashHi) + { + lsraStressMask = 0; + } + else if (dump == true) + { + printf("JitStressRegs = %x for method %s, hash = 0x%x.\n", + lsraStressMask, compiler->info.compFullName, compiler->info.compMethodHash()); + printf(""); // in our logic this causes a flush + } + } +#endif // DEBUG +#endif + + dumpTerse = (JitConfig.JitDumpTerseLsra() != 0); + +#endif // DEBUG + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); +#if ETW_EBP_FRAMED + availableIntRegs &= ~RBM_FPBASE; +#endif // ETW_EBP_FRAMED + availableFloatRegs = RBM_ALLFLOAT; + availableDoubleRegs = RBM_ALLDOUBLE; + +#ifdef _TARGET_AMD64_ + if (compiler->opts.compDbgEnC) + { + // On x64 when the EnC option is set, we always save exactly RBP, RSI and RDI. + // RBP is not available to the register allocator, so RSI and RDI are the only + // callee-save registers available. + availableIntRegs &= ~RBM_CALLEE_SAVED | RBM_RSI | RBM_RDI; + availableFloatRegs &= ~RBM_CALLEE_SAVED; + availableDoubleRegs &= ~RBM_CALLEE_SAVED; + } +#endif // _TARGET_AMD64_ + compiler->rpFrameType = FT_NOT_SET; + compiler->rpMustCreateEBPCalled = false; + + compiler->codeGen->intRegState.rsIsFloat = false; + compiler->codeGen->floatRegState.rsIsFloat = true; + + // Block sequencing (the order in which we schedule). + // Note that we don't initialize the bbVisitedSet until we do the first traversal + // (currently during Lowering's second phase, where it sets the TreeNodeInfo). + // This is so that any blocks that are added during the first phase of Lowering + // are accounted for (and we don't have BasicBlockEpoch issues). + blockSequencingDone = false; + blockSequence = nullptr; + blockSequenceWorkList = nullptr; + curBBSeqNum = 0; + bbSeqCount = 0; + + // Information about each block, including predecessor blocks used for variable locations at block entry. + blockInfo = nullptr; + + // Populate the register mask table. + // The first two masks in the table are allint/allfloat + // The next N are the masks for each single register. + // After that are the dynamically added ones. + regMaskTable = new (compiler, CMK_LSRA) regMaskTP[numMasks]; + regMaskTable[ALLINT_IDX] = allRegs(TYP_INT); + regMaskTable[ALLFLOAT_IDX] = allRegs(TYP_DOUBLE); + + regNumber reg; + for (reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg)) + { + regMaskTable[FIRST_SINGLE_REG_IDX + reg - REG_FIRST] = (reg == REG_STK) ? RBM_NONE : genRegMask(reg); + } + nextFreeMask = FIRST_SINGLE_REG_IDX + REG_COUNT; + noway_assert(nextFreeMask <= numMasks); +} + +// Return the reg mask corresponding to the given index. +regMaskTP LinearScan::GetRegMaskForIndex(RegMaskIndex index) +{ + assert(index < numMasks); + assert(index < nextFreeMask); + return regMaskTable[index]; +} + +// Given a reg mask, return the index it corresponds to. If it is not a 'well known' reg mask, +// add it at the end. This method has linear behavior in the worst cases but that is fairly rare. +// Most methods never use any but the well-known masks, and when they do use more +// it is only one or two more. +LinearScan::RegMaskIndex LinearScan::GetIndexForRegMask(regMaskTP mask) +{ + RegMaskIndex result; + if (isSingleRegister(mask)) + { + result = genRegNumFromMask(mask) + FIRST_SINGLE_REG_IDX; + } + else if (mask == allRegs(TYP_INT)) + { + result = ALLINT_IDX; + } + else if (mask == allRegs(TYP_DOUBLE)) + { + result = ALLFLOAT_IDX; + } + else + { + for (int i = FIRST_SINGLE_REG_IDX + REG_COUNT; i < nextFreeMask; i++) + { + if (regMaskTable[i] == mask) + { + return i; + } + } + + // We only allocate a fixed number of masks. Since we don't reallocate, we will throw a + // noway_assert if we exceed this limit. + noway_assert(nextFreeMask < numMasks); + + regMaskTable[nextFreeMask] = mask; + result = nextFreeMask; + nextFreeMask++; + } + assert(mask == regMaskTable[result]); + return result; +} + +// We've decided that we can't use a register during register allocation (probably FPBASE), +// but we've already added it to the register masks. Go through the masks and remove it. +void LinearScan::RemoveRegisterFromMasks(regNumber reg) +{ + JITDUMP("Removing register %s from LSRA register masks\n", getRegName(reg)); + + regMaskTP mask = ~genRegMask(reg); + for (int i = 0; i < nextFreeMask; i++) + { + regMaskTable[i] &= mask; + } + + JITDUMP("After removing register:\n"); + DBEXEC(VERBOSE, dspRegisterMaskTable()); +} + +#ifdef DEBUG +void LinearScan::dspRegisterMaskTable() +{ + printf("LSRA register masks. Total allocated: %d, total used: %d\n", numMasks, nextFreeMask); + for (int i = 0; i < nextFreeMask; i++) + { + printf("%2u: ", i); + dspRegMask(regMaskTable[i]); + printf("\n"); + } +} +#endif // DEBUG + +//------------------------------------------------------------------------ +// getNextCandidateFromWorkList: Get the next candidate for block sequencing +// +// Arguments: +// None. +// +// Return Value: +// The next block to be placed in the sequence. +// +// Notes: +// This method currently always returns the next block in the list, and relies on having +// blocks added to the list only when they are "ready", and on the +// addToBlockSequenceWorkList() method to insert them in the proper order. +// However, a block may be in the list and already selected, if it was subsequently +// encountered as both a flow and layout successor of the most recently selected +// block. + +BasicBlock* LinearScan::getNextCandidateFromWorkList() +{ + BasicBlockList* nextWorkList = nullptr; + for (BasicBlockList* workList = blockSequenceWorkList; workList != nullptr; workList = nextWorkList) + { + nextWorkList = workList->next; + BasicBlock* candBlock = workList->block; + removeFromBlockSequenceWorkList(workList, nullptr); + if (!isBlockVisited(candBlock)) + { + return candBlock; + } + } + return nullptr; +} + +//------------------------------------------------------------------------ +// setBlockSequence:Determine the block order for register allocation. +// +// Arguments: +// None +// +// Return Value: +// None +// +// Notes: +// On return, the blockSequence array contains the blocks, in the order in which they +// will be allocated. +// This method clears the bbVisitedSet on LinearScan, and when it returns the set +// contains all the bbNums for the block. +// This requires a traversal of the BasicBlocks, and could potentially be +// combined with the first traversal (currently the one in Lowering that sets the +// TreeNodeInfo). + +void LinearScan::setBlockSequence() +{ + // Reset the "visited" flag on each block. + compiler->EnsureBasicBlockEpoch(); + bbVisitedSet = BlockSetOps::MakeEmpty(compiler); + BlockSet BLOCKSET_INIT_NOCOPY(readySet, BlockSetOps::MakeEmpty(compiler)); + assert(blockSequence == nullptr && bbSeqCount == 0); + blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount]; + bbNumMaxBeforeResolution = compiler->fgBBNumMax; + blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; + + assert(blockSequenceWorkList == nullptr); + + bool addedInternalBlocks = false; + verifiedAllBBs = false; + BasicBlock* nextBlock; + for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock) + { + blockSequence[bbSeqCount] = block; + markBlockVisited(block); + bbSeqCount++; + nextBlock = nullptr; + + // Initialize the blockInfo. + // predBBNum will be set later. 0 is never used as a bbNum. + blockInfo[block->bbNum].predBBNum = 0; + // We check for critical edges below, but initialize to false. + blockInfo[block->bbNum].hasCriticalInEdge = false; + blockInfo[block->bbNum].hasCriticalOutEdge = false; + blockInfo[block->bbNum].weight = block->bbWeight; + + if (block->GetUniquePred(compiler) == nullptr) + { + for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext) + { + BasicBlock* predBlock = pred->flBlock; + if (predBlock->NumSucc(compiler) > 1) + { + blockInfo[block->bbNum].hasCriticalInEdge = true; + break; + } + else if (predBlock->bbJumpKind == BBJ_SWITCH) + { + assert(!"Switch with single successor"); + } + } + } + + // Determine which block to schedule next. + + // First, update the NORMAL successors of the current block, adding them to the worklist + // according to the desired order. We will handle the EH successors below. + bool checkForCriticalOutEdge = (block->NumSucc(compiler) > 1); + if (!checkForCriticalOutEdge && block->bbJumpKind == BBJ_SWITCH) + { + assert(!"Switch with single successor"); + } + + for (unsigned succIndex = 0; succIndex < block->NumSucc(compiler); succIndex++) + { + BasicBlock* succ = block->GetSucc(succIndex, compiler); + if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr) + { + blockInfo[block->bbNum].hasCriticalOutEdge = true; + // We can stop checking now. + checkForCriticalOutEdge = false; + } + + if (isTraversalLayoutOrder() || isBlockVisited(succ)) + { + continue; + } + + // We've now seen a predecessor, so add it to the work list and the "readySet". + // It will be inserted in the worklist according to the specified traversal order + // (i.e. pred-first or random, since layout order is handled above). + if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum)) + { + addToBlockSequenceWorkList(readySet, succ); + BlockSetOps::AddElemD(compiler, readySet, succ->bbNum); + } + } + + // For layout order, simply use bbNext + if (isTraversalLayoutOrder()) + { + nextBlock = block->bbNext; + continue; + } + + while (nextBlock == nullptr) + { + nextBlock = getNextCandidateFromWorkList(); + + // TODO-Throughput: We would like to bypass this traversal if we know we've handled all + // the blocks - but fgBBcount does not appear to be updated when blocks are removed. + if (nextBlock == nullptr /* && bbSeqCount != compiler->fgBBcount*/ && !verifiedAllBBs) + { + // If we don't encounter all blocks by traversing the regular sucessor links, do a full + // traversal of all the blocks, and add them in layout order. + // This may include: + // - internal-only blocks (in the fgAddCodeList) which may not be in the flow graph + // (these are not even in the bbNext links). + // - blocks that have become unreachable due to optimizations, but that are strongly + // connected (these are not removed) + // - EH blocks + + for (Compiler::AddCodeDsc* desc = compiler->fgAddCodeList; desc != nullptr; desc = desc->acdNext) + { + if (!isBlockVisited(block)) + { + addToBlockSequenceWorkList(readySet, block); + BlockSetOps::AddElemD(compiler, readySet, block->bbNum); + } + } + + for (BasicBlock* block = compiler->fgFirstBB; block; block = block->bbNext) + { + if (!isBlockVisited(block)) + { + addToBlockSequenceWorkList(readySet, block); + BlockSetOps::AddElemD(compiler, readySet, block->bbNum); + } + } + verifiedAllBBs = true; + } + else + { + break; + } + } + } + blockSequencingDone = true; + +#ifdef DEBUG + // Make sure that we've visited all the blocks. + for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) + { + assert(isBlockVisited(block)); + } + + JITDUMP("LSRA Block Sequence: "); + int i = 1; + for (BasicBlock *block = startBlockSequence(); block != nullptr; ++i, block = moveToNextBlock()) + { + JITDUMP("BB%02u", block->bbNum); + + if (block->isMaxBBWeight()) + { + JITDUMP("(MAX) "); + } + else + { + JITDUMP("(%6s) ", refCntWtd2str(block->getBBWeight(compiler))); + } + + if (i % 10 == 0) + { + JITDUMP("\n "); + } + } + JITDUMP("\n\n"); +#endif +} + +//------------------------------------------------------------------------ +// compareBlocksForSequencing: Compare two basic blocks for sequencing order. +// +// Arguments: +// block1 - the first block for comparison +// block2 - the second block for comparison +// useBlockWeights - whether to use block weights for comparison +// +// Return Value: +// -1 if block1 is preferred. +// 0 if the blocks are equivalent. +// 1 if block2 is preferred. +// +// Notes: +// See addToBlockSequenceWorkList. +int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights) +{ + if (useBlockWeights) + { + unsigned weight1 = block1->getBBWeight(compiler); + unsigned weight2 = block2->getBBWeight(compiler); + + if (weight1 > weight2) + { + return -1; + } + else if (weight1 < weight2) + { + return 1; + } + } + + // If weights are the same prefer LOWER bbnum + if (block1->bbNum < block2->bbNum) + { + return -1; + } + else if (block1->bbNum == block2->bbNum) + { + return 0; + } + else + { + return 1; + } +} + +//------------------------------------------------------------------------ +// addToBlockSequenceWorkList: Add a BasicBlock to the work list for sequencing. +// +// Arguments: +// sequencedBlockSet - the set of blocks that are already sequenced +// block - the new block to be added +// +// Return Value: +// None. +// +// Notes: +// The first block in the list will be the next one to be sequenced, as soon +// as we encounter a block whose successors have all been sequenced, in pred-first +// order, or the very next block if we are traversing in random order (once implemented). +// This method uses a comparison method to determine the order in which to place +// the blocks in the list. This method queries whether all predecessors of the +// block are sequenced at the time it is added to the list and if so uses block weights +// for inserting the block. A block is never inserted ahead of its predecessors. +// A block at the time of insertion may not have all its predecessors sequenced, in +// which case it will be sequenced based on its block number. Once a block is inserted, +// its priority\order will not be changed later once its remaining predecessors are +// sequenced. This would mean that work list may not be sorted entirely based on +// block weights alone. +// +// Note also that, when random traversal order is implemented, this method +// should insert the blocks into the list in random order, so that we can always +// simply select the first block in the list. +void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block) +{ + // The block that is being added is not already sequenced + assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum)); + + // Get predSet of block + BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler)); + flowList* pred; + for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext) + { + BlockSetOps::AddElemD(compiler, predSet, pred->flBlock->bbNum); + } + + // If either a rarely run block or all its preds are already sequenced, use block's weight to sequence + bool useBlockWeight = block->isRunRarely() || BlockSetOps::IsSubset(compiler, sequencedBlockSet, predSet); + + BasicBlockList* prevNode = nullptr; + BasicBlockList* nextNode = blockSequenceWorkList; + + while (nextNode != nullptr) + { + int seqResult; + + if (nextNode->block->isRunRarely()) + { + // If the block that is yet to be sequenced is a rarely run block, always use block weights for sequencing + seqResult = compareBlocksForSequencing(nextNode->block, block, true); + } + else if (BlockSetOps::IsMember(compiler, predSet, nextNode->block->bbNum)) + { + // always prefer unsequenced pred blocks + seqResult = -1; + } + else + { + seqResult = compareBlocksForSequencing(nextNode->block, block, useBlockWeight); + } + + if (seqResult > 0) + { + break; + } + + prevNode = nextNode; + nextNode = nextNode->next; + } + + BasicBlockList* newListNode = new (compiler, CMK_LSRA) BasicBlockList(block, nextNode); + if (prevNode == nullptr) + { + blockSequenceWorkList = newListNode; + } + else + { + prevNode->next = newListNode; + } +} + +void LinearScan::removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode) +{ + if (listNode == blockSequenceWorkList) + { + assert(prevNode == nullptr); + blockSequenceWorkList = listNode->next; + } + else + { + assert(prevNode != nullptr && prevNode->next == listNode); + prevNode->next = listNode->next; + } + // TODO-Cleanup: consider merging Compiler::BlockListNode and BasicBlockList + // compiler->FreeBlockListNode(listNode); +} + +// Initialize the block order for allocation (called each time a new traversal begins). +BasicBlock* LinearScan::startBlockSequence() +{ + if (!blockSequencingDone) + { + setBlockSequence(); + } + BasicBlock* curBB = compiler->fgFirstBB; + curBBSeqNum = 0; + curBBNum = curBB->bbNum; + clearVisitedBlocks(); + assert(blockSequence[0] == compiler->fgFirstBB); + markBlockVisited(curBB); + return curBB; +} + +//------------------------------------------------------------------------ +// moveToNextBlock: Move to the next block in order for allocation or resolution. +// +// Arguments: +// None +// +// Return Value: +// The next block. +// +// Notes: +// This method is used when the next block is actually going to be handled. +// It changes curBBNum. + +BasicBlock* LinearScan::moveToNextBlock() +{ + BasicBlock* nextBlock = getNextBlock(); + curBBSeqNum++; + if (nextBlock != nullptr) + { + curBBNum = nextBlock->bbNum; + } + return nextBlock; +} + +//------------------------------------------------------------------------ +// getNextBlock: Get the next block in order for allocation or resolution. +// +// Arguments: +// None +// +// Return Value: +// The next block. +// +// Notes: +// This method does not actually change the current block - it is used simply +// to determine which block will be next. + +BasicBlock* LinearScan::getNextBlock() +{ + assert(blockSequencingDone); + unsigned int nextBBSeqNum = curBBSeqNum + 1; + if (nextBBSeqNum < bbSeqCount) + { + return blockSequence[nextBBSeqNum]; + } + return nullptr; +} + +//------------------------------------------------------------------------ +// doLinearScan: The main method for register allocation. +// +// Arguments: +// None +// +// Return Value: +// None. +// +// Assumptions: +// Lowering must have set the NodeInfo (gtLsraInfo) on each node to communicate +// the register requirements. + +void LinearScan::doLinearScan() +{ +#ifdef DEBUG + if (VERBOSE) + { + printf("*************** In doLinearScan\n"); + printf("Trees before linear scan register allocator (LSRA)\n"); + compiler->fgDispBasicBlocks(true); + } +#endif // DEBUG + + splitBBNumToTargetBBNumMap = nullptr; + + // This is complicated by the fact that physical registers have refs associated + // with locations where they are killed (e.g. calls), but we don't want to + // count these as being touched. + + compiler->codeGen->regSet.rsClearRegsModified(); + + // Figure out if we're going to use an RSP frame or an RBP frame. We need to do this + // before building the intervals and ref positions, because those objects will embed + // RBP in various register masks (like preferences) if RBP is allowed to be allocated. + setFrameType(); + + initMaxSpill(); + buildIntervals(); + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS)); + compiler->EndPhase(PHASE_LINEAR_SCAN_BUILD); + + DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals")); + + BlockSetOps::ClearD(compiler, bbVisitedSet); + initVarRegMaps(); + allocateRegisters(); + compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC); + resolveRegisters(); + compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE); + + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST)); + + compiler->compLSRADone = true; +} + +//------------------------------------------------------------------------ +// recordVarLocationsAtStartOfBB: Update live-in LclVarDscs with the appropriate +// register location at the start of a block, during codegen. +// +// Arguments: +// bb - the block for which code is about to be generated. +// +// Return Value: +// None. +// +// Assumptions: +// CodeGen will take care of updating the reg masks and the current var liveness, +// after calling this method. +// This is because we need to kill off the dead registers before setting the newly live ones. + +void LinearScan::recordVarLocationsAtStartOfBB(BasicBlock* bb) +{ + JITDUMP("Recording Var Locations at start of BB%02u\n", bb->bbNum); + VarToRegMap map = getInVarToRegMap(bb->bbNum); + unsigned count = 0; + + VARSET_ITER_INIT(compiler, iter, bb->bbLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + regNumber regNum = getVarReg(map, varNum); + + regNumber oldRegNum = varDsc->lvRegNum; + regNumber newRegNum = regNum; + + if (oldRegNum != newRegNum) + { + JITDUMP(" V%02u(%s->%s)", varNum, compiler->compRegVarName(oldRegNum), + compiler->compRegVarName(newRegNum)); + varDsc->lvRegNum = newRegNum; + count++; + } + else if (newRegNum != REG_STK) + { + JITDUMP(" V%02u(%s)", varNum, compiler->compRegVarName(newRegNum)); + count++; + } + } + + if (count == 0) + { + JITDUMP(" <none>\n"); + } + + JITDUMP("\n"); +} + +void Interval::setLocalNumber(unsigned lclNum, LinearScan* linScan) +{ + linScan->localVarIntervals[lclNum] = this; + + assert(linScan->getIntervalForLocalVar(lclNum) == this); + this->isLocalVar = true; + this->varNum = lclNum; +} + +// identify the candidates which we are not going to enregister due to +// being used in EH in a way we don't want to deal with +// this logic cloned from fgInterBlockLocalVarLiveness +void LinearScan::identifyCandidatesExceptionDataflow() +{ + VARSET_TP VARSET_INIT_NOCOPY(exceptVars, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(filterVars, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(finallyVars, VarSetOps::MakeEmpty(compiler)); + BasicBlock* block; + + foreach_block(compiler, block) + { + if (block->bbCatchTyp != BBCT_NONE) + { + // live on entry to handler + VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn); + } + + if (block->bbJumpKind == BBJ_EHFILTERRET) + { + // live on exit from filter + VarSetOps::UnionD(compiler, filterVars, block->bbLiveOut); + } + else if (block->bbJumpKind == BBJ_EHFINALLYRET) + { + // live on exit from finally + VarSetOps::UnionD(compiler, finallyVars, block->bbLiveOut); + } +#if FEATURE_EH_FUNCLETS + // Funclets are called and returned from, as such we can only count on the frame + // pointer being restored, and thus everything live in or live out must be on the + // stack + if (block->bbFlags & BBF_FUNCLET_BEG) + { + VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn); + } + if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET) || + (block->bbJumpKind == BBJ_EHCATCHRET)) + { + VarSetOps::UnionD(compiler, exceptVars, block->bbLiveOut); + } +#endif // FEATURE_EH_FUNCLETS + } + + // slam them all together (there was really no need to use more than 2 bitvectors here) + VarSetOps::UnionD(compiler, exceptVars, filterVars); + VarSetOps::UnionD(compiler, exceptVars, finallyVars); + + /* Mark all pointer variables live on exit from a 'finally' + block as either volatile for non-GC ref types or as + 'explicitly initialized' (volatile and must-init) for GC-ref types */ + + VARSET_ITER_INIT(compiler, iter, exceptVars, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + compiler->lvaSetVarDoNotEnregister(varNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler)); + + if (varTypeIsGC(varDsc)) + { + if (VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam) + { + varDsc->lvMustInit = true; + } + } + } +} + +bool LinearScan::isRegCandidate(LclVarDsc* varDsc) +{ + // Check to see if opt settings permit register variables + if ((compiler->opts.compFlags & CLFLG_REGVAR) == 0) + { + return false; + } + + // If we have JMP, reg args must be put on the stack + + if (compiler->compJmpOpUsed && varDsc->lvIsRegArg) + { + return false; + } + + if (!varDsc->lvTracked) + { + return false; + } + + // Don't allocate registers for dependently promoted struct fields + if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)) + { + return false; + } + return true; +} + +// Identify locals & compiler temps that are register candidates +// TODO-Cleanup: This was cloned from Compiler::lvaSortByRefCount() in lclvars.cpp in order +// to avoid perturbation, but should be merged. + +void LinearScan::identifyCandidates() +{ + if (compiler->lvaCount == 0) + { + return; + } + + if (compiler->compHndBBtabCount > 0) + { + identifyCandidatesExceptionDataflow(); + } + + // initialize mapping from local to interval + localVarIntervals = new (compiler, CMK_LSRA) Interval*[compiler->lvaCount]; + + unsigned lclNum; + LclVarDsc* varDsc; + + // While we build intervals for the candidate lclVars, we will determine the floating point + // lclVars, if any, to consider for callee-save register preferencing. + // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count, + // and those that meet the second. + // The first threshold is used for methods that are heuristically deemed either to have light + // fp usage, or other factors that encourage conservative use of callee-save registers, such + // as multiple exits (where there might be an early exit that woudl be excessively penalized by + // lots of prolog/epilog saves & restores). + // The second threshold is used where there are factors deemed to make it more likely that fp + // fp callee save registers will be needed, such as loops or many fp vars. + // We keep two sets of vars, since we collect some of the information to determine which set to + // use as we iterate over the vars. + // When we are generating AVX code on non-Unix (FEATURE_PARTIAL_SIMD_CALLEE_SAVE), we maintain an + // additional set of LargeVectorType vars, and there is a separate threshold defined for those. + // It is assumed that if we encounter these, that we should consider this a "high use" scenario, + // so we don't maintain two sets of these vars. + // This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism + // for vectors on Arm64, though the actual value may differ. + + VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler)); + unsigned int floatVarCount = 0; + unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT; + unsigned int maybeFPRefCntWtd = 2 * BB_UNITY_WEIGHT; +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + VarSetOps::AssignNoCopy(compiler, largeVectorVars, VarSetOps::MakeEmpty(compiler)); + VarSetOps::AssignNoCopy(compiler, largeVectorCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler)); + unsigned int largeVectorVarCount = 0; + unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT; +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) + { + // Assign intervals to all the variables - this makes it easier to map + // them back + var_types intervalType = (var_types)varDsc->lvType; + Interval* newInt = newInterval(intervalType); + + newInt->setLocalNumber(lclNum, this); + if (varDsc->lvIsStructField) + { + newInt->isStructField = true; + } + + // Initialize all variables to REG_STK + varDsc->lvRegNum = REG_STK; +#ifndef _TARGET_64BIT_ + varDsc->lvOtherReg = REG_STK; +#endif // _TARGET_64BIT_ + +#if !defined(_TARGET_64BIT_) + if (intervalType == TYP_LONG) + { + // Long variables should not be register candidates. + // Lowering will have split any candidate lclVars into lo/hi vars. + varDsc->lvLRACandidate = 0; + continue; + } +#endif // !defined(_TARGET_64BIT) + + /* Track all locals that can be enregistered */ + + varDsc->lvLRACandidate = 1; + + if (!isRegCandidate(varDsc)) + { + varDsc->lvLRACandidate = 0; + continue; + } + + // Start with lvRegister as false - set it true only if the variable gets + // the same register assignment throughout + varDsc->lvRegister = false; + + /* If the ref count is zero */ + if (varDsc->lvRefCnt == 0) + { + /* Zero ref count, make this untracked */ + varDsc->lvRefCntWtd = 0; + varDsc->lvLRACandidate = 0; + } + + // Variables that are address-exposed are never enregistered, or tracked. + // A struct may be promoted, and a struct that fits in a register may be fully enregistered. + // Pinned variables may not be tracked (a condition of the GCInfo representation) + // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning") + // references when using the general GC encoding. + + if (varDsc->lvAddrExposed || !varTypeIsEnregisterableStruct(varDsc)) + { + varDsc->lvLRACandidate = 0; +#ifdef DEBUG + Compiler::DoNotEnregisterReason dner = Compiler::DNER_AddrExposed; + if (!varDsc->lvAddrExposed) + { + dner = Compiler::DNER_IsStruct; + } +#endif // DEBUG + compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(dner)); + } + else if (varDsc->lvPinned) + { + varDsc->lvTracked = 0; +#ifdef JIT32_GCENCODER + compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_PinningRef)); +#endif // JIT32_GCENCODER + } + + // Are we not optimizing and we have exception handlers? + // if so mark all args and locals as volatile, so that they + // won't ever get enregistered. + // + if (compiler->opts.MinOpts() && compiler->compHndBBtabCount > 0) + { + compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler)); + varDsc->lvLRACandidate = 0; + continue; + } + + if (varDsc->lvDoNotEnregister) + { + varDsc->lvLRACandidate = 0; + continue; + } + + var_types type = genActualType(varDsc->TypeGet()); + + switch (type) + { +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: + if (compiler->opts.compDbgCode) + { + varDsc->lvLRACandidate = 0; + } + break; +#endif // CPU_HAS_FP_SUPPORT + + case TYP_INT: + case TYP_LONG: + case TYP_REF: + case TYP_BYREF: + break; + +#ifdef FEATURE_SIMD + case TYP_SIMD12: + case TYP_SIMD16: + case TYP_SIMD32: + if (varDsc->lvPromoted) + { + varDsc->lvLRACandidate = 0; + } + break; + // TODO-1stClassStructs: Move TYP_SIMD8 up with the other SIMD types, after handling the param issue + // (passing & returning as TYP_LONG). + case TYP_SIMD8: +#endif // FEATURE_SIMD + + case TYP_STRUCT: + { + varDsc->lvLRACandidate = 0; + } + break; + + case TYP_UNDEF: + case TYP_UNKNOWN: + noway_assert(!"lvType not set correctly"); + varDsc->lvType = TYP_INT; + + __fallthrough; + + default: + varDsc->lvLRACandidate = 0; + } + + // we will set this later when we have determined liveness + if (varDsc->lvLRACandidate) + { + varDsc->lvMustInit = false; + } + + // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count, + // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd + // above). + CLANG_FORMAT_COMMENT_ANCHOR; + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + // Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType + // vars. + if (varDsc->lvType == LargeVectorType) + { + largeVectorVarCount++; + VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex); + unsigned refCntWtd = varDsc->lvRefCntWtd; + if (refCntWtd >= thresholdLargeVectorRefCntWtd) + { + VarSetOps::AddElemD(compiler, largeVectorCalleeSaveCandidateVars, varDsc->lvVarIndex); + } + } + else +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (regType(newInt->registerType) == FloatRegisterType) + { + floatVarCount++; + unsigned refCntWtd = varDsc->lvRefCntWtd; + if (varDsc->lvIsRegArg) + { + // Don't count the initial reference for register params. In those cases, + // using a callee-save causes an extra copy. + refCntWtd -= BB_UNITY_WEIGHT; + } + if (refCntWtd >= thresholdFPRefCntWtd) + { + VarSetOps::AddElemD(compiler, fpCalleeSaveCandidateVars, varDsc->lvVarIndex); + } + else if (refCntWtd >= maybeFPRefCntWtd) + { + VarSetOps::AddElemD(compiler, fpMaybeCandidateVars, varDsc->lvVarIndex); + } + } + } + + // The factors we consider to determine which set of fp vars to use as candidates for callee save + // registers current include the number of fp vars, whether there are loops, and whether there are + // multiple exits. These have been selected somewhat empirically, but there is probably room for + // more tuning. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (VERBOSE) + { + printf("\nFP callee save candidate vars: "); + if (!VarSetOps::IsEmpty(compiler, fpCalleeSaveCandidateVars)) + { + dumpConvertedVarSet(compiler, fpCalleeSaveCandidateVars); + printf("\n"); + } + else + { + printf("None\n\n"); + } + } +#endif + + JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n", floatVarCount, compiler->fgHasLoops, + (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr)); + + // Determine whether to use the 2nd, more aggressive, threshold for fp callee saves. + if (floatVarCount > 6 && compiler->fgHasLoops && + (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr)) + { +#ifdef DEBUG + if (VERBOSE) + { + printf("Adding additional fp callee save candidates: \n"); + if (!VarSetOps::IsEmpty(compiler, fpMaybeCandidateVars)) + { + dumpConvertedVarSet(compiler, fpMaybeCandidateVars); + printf("\n"); + } + else + { + printf("None\n\n"); + } + } +#endif + VarSetOps::UnionD(compiler, fpCalleeSaveCandidateVars, fpMaybeCandidateVars); + } + +#ifdef _TARGET_ARM_ +#ifdef DEBUG + if (VERBOSE) + { + // Frame layout is only pre-computed for ARM + printf("\nlvaTable after IdentifyCandidates\n"); + compiler->lvaTableDump(); + } +#endif // DEBUG +#endif // _TARGET_ARM_ +} + +// TODO-Throughput: This mapping can surely be more efficiently done +void LinearScan::initVarRegMaps() +{ + assert(compiler->lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked + // variables. + + // The compiler memory allocator requires that the allocation be an + // even multiple of int-sized objects + unsigned int varCount = compiler->lvaTrackedCount; + regMapCount = (unsigned int)roundUp(varCount, sizeof(int)); + + // Not sure why blocks aren't numbered from zero, but they don't appear to be. + // So, if we want to index by bbNum we have to know the maximum value. + unsigned int bbCount = compiler->fgBBNumMax + 1; + + inVarToRegMaps = new (compiler, CMK_LSRA) regNumber*[bbCount]; + outVarToRegMaps = new (compiler, CMK_LSRA) regNumber*[bbCount]; + + if (varCount > 0) + { + // This VarToRegMap is used during the resolution of critical edges. + sharedCriticalVarToRegMap = new (compiler, CMK_LSRA) regNumber[regMapCount]; + + for (unsigned int i = 0; i < bbCount; i++) + { + regNumber* inVarToRegMap = new (compiler, CMK_LSRA) regNumber[regMapCount]; + regNumber* outVarToRegMap = new (compiler, CMK_LSRA) regNumber[regMapCount]; + + for (unsigned int j = 0; j < regMapCount; j++) + { + inVarToRegMap[j] = REG_STK; + outVarToRegMap[j] = REG_STK; + } + inVarToRegMaps[i] = inVarToRegMap; + outVarToRegMaps[i] = outVarToRegMap; + } + } + else + { + sharedCriticalVarToRegMap = nullptr; + for (unsigned int i = 0; i < bbCount; i++) + { + inVarToRegMaps[i] = nullptr; + outVarToRegMaps[i] = nullptr; + } + } +} + +void LinearScan::setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg) +{ + assert(reg < UCHAR_MAX && varNum < compiler->lvaCount); + inVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = reg; +} + +void LinearScan::setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg) +{ + assert(reg < UCHAR_MAX && varNum < compiler->lvaCount); + outVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = reg; +} + +LinearScan::SplitEdgeInfo LinearScan::getSplitEdgeInfo(unsigned int bbNum) +{ + SplitEdgeInfo splitEdgeInfo; + assert(bbNum <= compiler->fgBBNumMax); + assert(bbNum > bbNumMaxBeforeResolution); + assert(splitBBNumToTargetBBNumMap != nullptr); + splitBBNumToTargetBBNumMap->Lookup(bbNum, &splitEdgeInfo); + assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution); + assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution); + return splitEdgeInfo; +} + +VarToRegMap LinearScan::getInVarToRegMap(unsigned int bbNum) +{ + assert(bbNum <= compiler->fgBBNumMax); + // For the blocks inserted to split critical edges, the inVarToRegMap is + // equal to the outVarToRegMap at the "from" block. + if (bbNum > bbNumMaxBeforeResolution) + { + SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum); + unsigned fromBBNum = splitEdgeInfo.fromBBNum; + if (fromBBNum == 0) + { + assert(splitEdgeInfo.toBBNum != 0); + return inVarToRegMaps[splitEdgeInfo.toBBNum]; + } + else + { + return outVarToRegMaps[fromBBNum]; + } + } + + return inVarToRegMaps[bbNum]; +} + +VarToRegMap LinearScan::getOutVarToRegMap(unsigned int bbNum) +{ + assert(bbNum <= compiler->fgBBNumMax); + // For the blocks inserted to split critical edges, the outVarToRegMap is + // equal to the inVarToRegMap at the target. + if (bbNum > bbNumMaxBeforeResolution) + { + // If this is an empty block, its in and out maps are both the same. + // We identify this case by setting fromBBNum or toBBNum to 0, and using only the other. + SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum); + unsigned toBBNum = splitEdgeInfo.toBBNum; + if (toBBNum == 0) + { + assert(splitEdgeInfo.fromBBNum != 0); + return outVarToRegMaps[splitEdgeInfo.fromBBNum]; + } + else + { + return inVarToRegMaps[toBBNum]; + } + } + return outVarToRegMaps[bbNum]; +} + +regNumber LinearScan::getVarReg(VarToRegMap bbVarToRegMap, unsigned int varNum) +{ + assert(compiler->lvaTable[varNum].lvTracked); + return bbVarToRegMap[compiler->lvaTable[varNum].lvVarIndex]; +} + +// Initialize the incoming VarToRegMap to the given map values (generally a predecessor of +// the block) +VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap) +{ + VarToRegMap inVarToRegMap = inVarToRegMaps[bbNum]; + memcpy(inVarToRegMap, srcVarToRegMap, (regMapCount * sizeof(regNumber))); + return inVarToRegMap; +} + +// find the last node in the tree in execution order +// TODO-Throughput: this is inefficient! +GenTree* lastNodeInTree(GenTree* tree) +{ + // There is no gtprev on the top level tree node so + // apparently the way to walk a tree backwards is to walk + // it forward, find the last node, and walk back from there. + + GenTree* last = nullptr; + if (tree->OperGet() == GT_STMT) + { + GenTree* statement = tree; + + foreach_treenode_execution_order(tree, statement) + { + last = tree; + } + return last; + } + else + { + while (tree) + { + last = tree; + tree = tree->gtNext; + } + return last; + } +} + +// given a tree node +RefType refTypeForLocalRefNode(GenTree* node) +{ + assert(node->IsLocal()); + + // We don't support updates + assert((node->gtFlags & GTF_VAR_USEASG) == 0); + + if (node->gtFlags & GTF_VAR_DEF) + { + return RefTypeDef; + } + else + { + return RefTypeUse; + } +} + +// This function sets RefPosition last uses by walking the RefPositions, instead of walking the +// tree nodes in execution order (as was done in a previous version). +// This is because the execution order isn't strictly correct, specifically for +// references to local variables that occur in arg lists. +// +// TODO-Throughput: This function should eventually be eliminated, as we should be able to rely on last uses +// being set by dataflow analysis. It is necessary to do it this way only because the execution +// order wasn't strictly correct. + +void LinearScan::setLastUses(BasicBlock* block) +{ +#ifdef DEBUG + if (VERBOSE) + { + JITDUMP("\n\nCALCULATING LAST USES for block %u, liveout=", block->bbNum); + dumpConvertedVarSet(compiler, block->bbLiveOut); + JITDUMP("\n==============================\n"); + } +#endif // DEBUG + + unsigned keepAliveVarNum = BAD_VAR_NUM; + if (compiler->lvaKeepAliveAndReportThis()) + { + keepAliveVarNum = compiler->info.compThisArg; + assert(compiler->info.compIsStatic == false); + } + + // find which uses are lastUses + + // Work backwards starting with live out. + // 'temp' is updated to include any exposed use (including those in this + // block that we've already seen). When we encounter a use, if it's + // not in that set, then it's a last use. + + VARSET_TP VARSET_INIT(compiler, temp, block->bbLiveOut); + + auto currentRefPosition = refPositions.rbegin(); + + while (currentRefPosition->refType != RefTypeBB) + { + // We should never see ParamDefs or ZeroInits within a basic block. + assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit); + if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar) + { + unsigned varNum = currentRefPosition->getInterval()->varNum; + unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler); + // We should always have a tree node for a localVar, except for the "special" RefPositions. + GenTreePtr tree = currentRefPosition->treeNode; + assert(tree != nullptr || currentRefPosition->refType == RefTypeExpUse || + currentRefPosition->refType == RefTypeDummyDef); + if (!VarSetOps::IsMember(compiler, temp, varIndex) && varNum != keepAliveVarNum) + { + // There was no exposed use, so this is a + // "last use" (and we mark it thus even if it's a def) + + if (tree != nullptr) + { + tree->gtFlags |= GTF_VAR_DEATH; + } + LsraLocation loc = currentRefPosition->nodeLocation; +#ifdef DEBUG + if (getLsraExtendLifeTimes()) + { + JITDUMP("last use of V%02u @%u (not marked as last use for LSRA due to extendLifetimes stress " + "option)\n", + compiler->lvaTrackedToVarNum[varIndex], loc); + } + else +#endif // DEBUG + { + JITDUMP("last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc); + currentRefPosition->lastUse = true; + } + VarSetOps::AddElemD(compiler, temp, varIndex); + } + else + { + currentRefPosition->lastUse = false; + if (tree != nullptr) + { + tree->gtFlags &= ~GTF_VAR_DEATH; + } + } + + if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef) + { + VarSetOps::RemoveElemD(compiler, temp, varIndex); + } + } + assert(currentRefPosition != refPositions.rend()); + ++currentRefPosition; + } + +#ifdef DEBUG + VARSET_TP VARSET_INIT(compiler, temp2, block->bbLiveIn); + VarSetOps::DiffD(compiler, temp2, temp); + VarSetOps::DiffD(compiler, temp, block->bbLiveIn); + bool foundDiff = false; + + { + VARSET_ITER_INIT(compiler, iter, temp, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + if (compiler->lvaTable[varNum].lvLRACandidate) + { + JITDUMP("BB%02u: V%02u is computed live, but not in LiveIn set.\n", block->bbNum, varNum); + foundDiff = true; + } + } + } + + { + VARSET_ITER_INIT(compiler, iter, temp2, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + if (compiler->lvaTable[varNum].lvLRACandidate) + { + JITDUMP("BB%02u: V%02u is in LiveIn set, but not computed live.\n", block->bbNum, varNum); + foundDiff = true; + } + } + } + + assert(!foundDiff); +#endif // DEBUG +} + +void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) +{ + for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) + { + if (mask & 1) + { + // This assumes that these are all "special" RefTypes that + // don't need to be recorded on the tree (hence treeNode is nullptr) + RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, + genRegMask(reg)); // This MUST occupy the physical register (obviously) + + if (isLastUse) + { + pos->lastUse = true; + } + } + } +} + +//------------------------------------------------------------------------ +// getKillSetForNode: Return the registers killed by the given tree node. +// +// Arguments: +// compiler - the compiler context to use +// tree - the tree for which the kill set is needed. +// +// Return Value: a register mask of the registers killed +// +regMaskTP LinearScan::getKillSetForNode(GenTree* tree) +{ + regMaskTP killMask = RBM_NONE; + switch (tree->OperGet()) + { +#ifdef _TARGET_XARCH_ + case GT_MUL: + // We use the 128-bit multiply when performing an overflow checking unsigned multiply + // + if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx()) + { + // Both RAX and RDX are killed by the operation + killMask = RBM_RAX | RBM_RDX; + } + break; + + case GT_MULHI: + killMask = RBM_RAX | RBM_RDX; + break; + + case GT_MOD: + case GT_DIV: + case GT_UMOD: + case GT_UDIV: + if (!varTypeIsFloating(tree->TypeGet())) + { + // RDX needs to be killed early, because it must not be used as a source register + // (unlike most cases, where the kill happens AFTER the uses). So for this kill, + // we add the RefPosition at the tree loc (where the uses are located) instead of the + // usual kill location which is the same as the defs at tree loc+1. + // Note that we don't have to add interference for the live vars, because that + // will be done below, and is not sensitive to the precise location. + LsraLocation currentLoc = tree->gtLsraInfo.loc; + assert(currentLoc != 0); + addRefsForPhysRegMask(RBM_RDX, currentLoc, RefTypeKill, true); + // Both RAX and RDX are killed by the operation + killMask = RBM_RAX | RBM_RDX; + } + break; +#endif // _TARGET_XARCH_ + + case GT_STORE_OBJ: + if (tree->OperIsCopyBlkOp()) + { + assert(tree->AsObj()->gtGcPtrCount != 0); + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF); + break; + } + __fallthrough; + + case GT_STORE_BLK: + case GT_STORE_DYN_BLK: + { + GenTreeBlk* blkNode = tree->AsBlk(); + bool isCopyBlk = varTypeIsStruct(blkNode->Data()); + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindHelper: + if (isCopyBlk) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); + } + else + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); + } + break; + +#ifdef _TARGET_XARCH_ + case GenTreeBlk::BlkOpKindRepInstr: + if (isCopyBlk) + { + // rep movs kills RCX, RDI and RSI + killMask = RBM_RCX | RBM_RDI | RBM_RSI; + } + else + { + // rep stos kills RCX and RDI. + // (Note that the Data() node, if not constant, will be assigned to + // RCX, but it's find that this kills it, as the value is not available + // after this node in any case.) + killMask = RBM_RDI | RBM_RCX; + } + break; +#else + case GenTreeBlk::BlkOpKindRepInstr: +#endif + case GenTreeBlk::BlkOpKindUnroll: + case GenTreeBlk::BlkOpKindInvalid: + // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE + break; + } + } + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROL: + case GT_ROR: + if (tree->gtLsraInfo.isHelperCallWithKills) + { + killMask = RBM_CALLEE_TRASH; + } + break; + case GT_RETURNTRAP: + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + break; + case GT_CALL: +#ifdef _TARGET_X86_ + if (compiler->compFloatingPointUsed) + { + if (tree->TypeGet() == TYP_DOUBLE) + { + needDoubleTmpForFPCall = true; + } + else if (tree->TypeGet() == TYP_FLOAT) + { + needFloatTmpForFPCall = true; + } + } + if (tree->IsHelperCall()) + { + GenTreeCall* call = tree->AsCall(); + CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); + killMask = compiler->compHelperCallKillSet(helpFunc); + } + else +#endif // _TARGET_X86_ + { + // if there is no FP used, we can ignore the FP kills + if (compiler->compFloatingPointUsed) + { + killMask = RBM_CALLEE_TRASH; + } + else + { + killMask = RBM_INT_CALLEE_TRASH; + } + } + break; + case GT_STOREIND: + if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) + { + killMask = RBM_CALLEE_TRASH_NOGC; +#if !NOGC_WRITE_BARRIERS && (defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)) + killMask |= (RBM_ARG_0 | RBM_ARG_1); +#endif // !NOGC_WRITE_BARRIERS && (defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)) + } + break; + +#if defined(PROFILING_SUPPORTED) && defined(_TARGET_AMD64_) + // If this method requires profiler ELT hook then mark these nodes as killing + // callee trash registers (excluding RAX and XMM0). The reason for this is that + // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for + // more details. + case GT_RETURN: + if (compiler->compIsProfilerHookNeeded()) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE); + } + break; + + case GT_PROF_HOOK: + if (compiler->compIsProfilerHookNeeded()) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL); + ; + } + break; +#endif // PROFILING_SUPPORTED && _TARGET_AMD64_ + + default: + // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE + break; + } + return killMask; +} + +//------------------------------------------------------------------------ +// buildKillPositionsForNode: +// Given some tree node add refpositions for all the registers this node kills +// +// Arguments: +// tree - the tree for which kill positions should be generated +// currentLoc - the location at which the kills should be added +// +// Return Value: +// true - kills were inserted +// false - no kills were inserted +// +// Notes: +// The return value is needed because if we have any kills, we need to make sure that +// all defs are located AFTER the kills. On the other hand, if there aren't kills, +// the multiple defs for a regPair are in different locations. +// If we generate any kills, we will mark all currentLiveVars as being preferenced +// to avoid the killed registers. This is somewhat conservative. + +bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc) +{ + regMaskTP killMask = getKillSetForNode(tree); + bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + if (killMask != RBM_NONE) + { + // The killMask identifies a set of registers that will be used during codegen. + // Mark these as modified here, so when we do final frame layout, we'll know about + // all these registers. This is especially important if killMask contains + // callee-saved registers, which affect the frame size since we need to save/restore them. + // In the case where we have a copyBlk with GC pointers, can need to call the + // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if + // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen, + // which is too late. + compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(dumpTerse)); + + addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); + + // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee + // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs, + // many of which appear to be regressions (because there is more spill on the infrequently path), + // but are not really because the frequent path becomes smaller. Validating these diffs will need + // to be done before making this change. + // if (!blockSequence[curBBSeqNum]->isRunRarely()) + { + + VARSET_ITER_INIT(compiler, iter, currentLiveVars, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (varDsc->lvType == LargeVectorType) + { + if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) + { + continue; + } + } + else +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (varTypeIsFloating(varDsc) && + !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex)) + { + continue; + } + Interval* interval = getIntervalForLocalVar(varNum); + if (isCallKill) + { + interval->preferCalleeSave = true; + } + regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); + + if (newPreferences != RBM_NONE) + { + interval->updateRegisterPreferences(newPreferences); + } + else + { + // If there are no callee-saved registers, the call could kill all the registers. + // This is a valid state, so in that case assert should not trigger. The RA will spill in order to + // free a register later. + assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); + } + } + } + + if (tree->IsCall() && (tree->gtFlags & GTF_CALL_UNMANAGED) != 0) + { + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, + (allRegs(TYP_REF) & ~RBM_ARG_REGS)); + } + return true; + } + + return false; +} + +RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, + RegisterType regType, + LsraLocation currentLoc, + regMaskTP regMask) +{ + Interval* current = newInterval(regType); + current->isInternal = true; + return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask); +} + +int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, + LsraLocation currentLoc, + RefPosition* temps[]) // populates +{ + int count; + int internalIntCount = tree->gtLsraInfo.internalIntCount; + regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this); + + // If the number of internal integer registers required is the same as the number of candidate integer registers in + // the candidate set, then they must be handled as fixed registers. + // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.) + bool fixedRegs = false; + regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT)); + if (((int)genCountBits(internalIntCandidates)) == internalIntCount) + { + fixedRegs = true; + } + + for (count = 0; count < internalIntCount; count++) + { + regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT)); + if (fixedRegs) + { + internalIntCands = genFindLowestBit(internalIntCands); + internalCands &= ~internalIntCands; + } + temps[count] = defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands); + } + + int internalFloatCount = tree->gtLsraInfo.internalFloatCount; + for (int i = 0; i < internalFloatCount; i++) + { + regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates()); + temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands); + } + + noway_assert(count < MaxInternalRegisters); + assert(count == (internalIntCount + internalFloatCount)); + return count; +} + +void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, + LsraLocation currentLoc, + RefPosition* defs[], + int total) +{ + assert(total < MaxInternalRegisters); + + // defs[] has been populated by buildInternalRegisterDefsForNode + // now just add uses to the defs previously added. + for (int i = 0; i < total; i++) + { + RefPosition* prevRefPosition = defs[i]; + assert(prevRefPosition != nullptr); + regMaskTP mask = prevRefPosition->registerAssignment; + if (prevRefPosition->isPhysRegRef) + { + newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask); + } + else + { + RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask); + newest->lastUse = true; + } + } +} + +regMaskTP LinearScan::getUseCandidates(GenTree* useNode) +{ + TreeNodeInfo info = useNode->gtLsraInfo; + return info.getSrcCandidates(this); +} + +regMaskTP LinearScan::getDefCandidates(GenTree* tree) +{ + TreeNodeInfo info = tree->gtLsraInfo; + return info.getDstCandidates(this); +} + +RegisterType LinearScan::getDefType(GenTree* tree) +{ + return tree->TypeGet(); +} + +regMaskTP fixedCandidateMask(var_types type, regMaskTP candidates) +{ + if (genMaxOneBit(candidates)) + { + return candidates; + } + return RBM_NONE; +} + +//------------------------------------------------------------------------ +// LocationInfoListNode: used to store a single `LocationInfo` value for a +// node during `buildIntervals`. +// +// This is the node type for `LocationInfoList` below. +// +class LocationInfoListNode final : public LocationInfo +{ + friend class LocationInfoList; + friend class LocationInfoListNodePool; + + LocationInfoListNode* m_next; // The next node in the list + +public: + LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx) + { + } + + //------------------------------------------------------------------------ + // LocationInfoListNode::Next: Returns the next node in the list. + LocationInfoListNode* Next() const + { + return m_next; + } +}; + +//------------------------------------------------------------------------ +// LocationInfoList: used to store a list of `LocationInfo` values for a +// node during `buildIntervals`. +// +// Given an IR node that either directly defines N registers or that is a +// contained node with uses that define a total of N registers, that node +// will map to N `LocationInfo` values. These values are stored as a +// linked list of `LocationInfoListNode` values. +// +class LocationInfoList final +{ + friend class LocationInfoListNodePool; + + LocationInfoListNode* m_head; // The head of the list + LocationInfoListNode* m_tail; // The tail of the list + +public: + LocationInfoList() : m_head(nullptr), m_tail(nullptr) + { + } + + LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node) + { + assert(m_head->m_next == nullptr); + } + + //------------------------------------------------------------------------ + // LocationInfoList::IsEmpty: Returns true if the list is empty. + // + bool IsEmpty() const + { + return m_head == nullptr; + } + + //------------------------------------------------------------------------ + // LocationInfoList::Begin: Returns the first node in the list. + // + LocationInfoListNode* Begin() const + { + return m_head; + } + + //------------------------------------------------------------------------ + // LocationInfoList::End: Returns the position after the last node in the + // list. The returned value is suitable for use as + // a sentinel for iteration. + // + LocationInfoListNode* End() const + { + return nullptr; + } + + //------------------------------------------------------------------------ + // LocationInfoList::Append: Appends a node to the list. + // + // Arguments: + // node - The node to append. Must not be part of an existing list. + // + void Append(LocationInfoListNode* node) + { + assert(node->m_next == nullptr); + + if (m_tail == nullptr) + { + assert(m_head == nullptr); + m_head = node; + } + else + { + m_tail->m_next = node; + } + + m_tail = node; + } + + //------------------------------------------------------------------------ + // LocationInfoList::Append: Appends another list to this list. + // + // Arguments: + // other - The list to append. + // + void Append(LocationInfoList other) + { + if (m_tail == nullptr) + { + assert(m_head == nullptr); + m_head = other.m_head; + } + else + { + m_tail->m_next = other.m_head; + } + + m_tail = other.m_tail; + } +}; + +//------------------------------------------------------------------------ +// LocationInfoListNodePool: manages a pool of `LocationInfoListNode` +// values to decrease overall memory usage +// during `buildIntervals`. +// +// `buildIntervals` involves creating a list of location info values per +// node that either directly produces a set of registers or that is a +// contained node with register-producing sources. However, these lists +// are short-lived: they are destroyed once the use of the corresponding +// node is processed. As such, there is typically only a small number of +// `LocationInfoListNode` values in use at any given time. Pooling these +// values avoids otherwise frequent allocations. +class LocationInfoListNodePool final +{ + LocationInfoListNode* m_freeList; + Compiler* m_compiler; + +public: + //------------------------------------------------------------------------ + // LocationInfoListNodePool::LocationInfoListNodePool: + // Creates a pool of `LocationInfoListNode` values. + // + // Arguments: + // compiler - The compiler context. + // preallocate - The number of nodes to preallocate. + // + LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = 0) : m_compiler(compiler) + { + if (preallocate > 0) + { + size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate; + auto* preallocatedNodes = reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize)); + + LocationInfoListNode* head = preallocatedNodes; + head->m_next = nullptr; + + for (unsigned i = 1; i < preallocate; i++) + { + LocationInfoListNode* node = &preallocatedNodes[i]; + node->m_next = head; + head = node; + } + + m_freeList = head; + } + } + + //------------------------------------------------------------------------ + // LocationInfoListNodePool::GetNode: Fetches an unused node from the + // pool. + // + // Arguments: + // l - - The `LsraLocation` for the `LocationInfo` value. + // i - The interval for the `LocationInfo` value. + // t - The IR node for the `LocationInfo` value + // regIdx - The register index for the `LocationInfo` value. + // + // Returns: + // A pooled or newly-allocated `LocationInfoListNode`, depending on the + // contents of the pool. + LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) + { + LocationInfoListNode* head = m_freeList; + if (head == nullptr) + { + head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode))); + } + else + { + m_freeList = head->m_next; + } + + head->loc = l; + head->interval = i; + head->treeNode = t; + head->multiRegIdx = regIdx; + head->m_next = nullptr; + + return head; + } + + //------------------------------------------------------------------------ + // LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the + // pool. + // + // Arguments: + // list - The list to return. + // + void ReturnNodes(LocationInfoList& list) + { + assert(list.m_head != nullptr); + assert(list.m_tail != nullptr); + + LocationInfoListNode* head = m_freeList; + list.m_tail->m_next = head; + m_freeList = list.m_head; + } +}; + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +VARSET_VALRET_TP +LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc) +{ + VARSET_TP VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::MakeEmpty(compiler)); + regMaskTP fpCalleeKillSet = RBM_NONE; + if (!VarSetOps::IsEmpty(compiler, largeVectorVars)) + { + // We actually need to find any calls that kill the upper-half of the callee-save vector registers. + // But we will use as a proxy any node that kills floating point registers. + // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) + fpCalleeKillSet = getKillSetForNode(tree); + if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE) + { + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, + VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars)); + VARSET_ITER_INIT(compiler, iter, liveLargeVectors, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + Interval* varInterval = getIntervalForLocalVar(varNum); + Interval* tempInterval = newInterval(LargeVectorType); + tempInterval->isInternal = true; + RefPosition* pos = + newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED); + // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set + // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding + // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval, + // and set varInterval as the relatedInterval of tempInterval. + tempInterval->relatedInterval = varInterval->relatedInterval; + varInterval->relatedInterval = tempInterval; + } + } + } + return liveLargeVectors; +} + +void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree, + LsraLocation currentLoc, + VARSET_VALARG_TP liveLargeVectors) +{ + if (!VarSetOps::IsEmpty(compiler, liveLargeVectors)) + { + VARSET_ITER_INIT(compiler, iter, liveLargeVectors, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + Interval* varInterval = getIntervalForLocalVar(varNum); + Interval* tempInterval = varInterval->relatedInterval; + assert(tempInterval->isInternal == true); + RefPosition* pos = + newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED); + // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval + // of tempInterval. + varInterval->relatedInterval = tempInterval->relatedInterval; + tempInterval->relatedInterval = varInterval; + } + } +} +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + +#ifdef DEBUG +//------------------------------------------------------------------------ +// ComputeOperandDstCount: computes the number of registers defined by a +// node. +// +// For most nodes, this is simple: +// - Nodes that do not produce values (e.g. stores and other void-typed +// nodes) and nodes that immediately use the registers they define +// produce no registers +// - Nodes that are marked as defining N registers define N registers. +// +// For contained nodes, however, things are more complicated: for purposes +// of bookkeeping, a contained node is treated as producing the transitive +// closure of the registers produced by its sources. +// +// Arguments: +// operand - The operand for which to compute a register count. +// +// Returns: +// The number of registers defined by `operand`. +// +static int ComputeOperandDstCount(GenTree* operand) +{ + TreeNodeInfo& operandInfo = operand->gtLsraInfo; + + if (operandInfo.isLocalDefUse) + { + // Operands that define an unused value do not produce any registers. + return 0; + } + else if (operandInfo.dstCount != 0) + { + // Operands that have a specified number of destination registers consume all of their operands + // and therefore produce exactly that number of registers. + return operandInfo.dstCount; + } + else if (operandInfo.srcCount != 0) + { + // If an operand has no destination registers but does have source registers, it must be a store + // or a compare. + assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || + operand->OperIsCompare()); + return 0; + } + else if (!operand->OperIsAggregate() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID)) + { + // Stores and void-typed operands may be encountered when processing call nodes, which contain + // pointers to argument setup stores. + return 0; + } + else + { + // If an aggregate or non-void-typed operand is not an unsued value and does not have source registers, + // that argument is contained within its parent and produces `sum(operand_dst_count)` registers. + int dstCount = 0; + for (GenTree* op : operand->Operands()) + { + dstCount += ComputeOperandDstCount(op); + } + + return dstCount; + } +} + +//------------------------------------------------------------------------ +// ComputeAvailableSrcCount: computes the number of registers available as +// sources for a node. +// +// This is simply the sum of the number of registers prduced by each +// operand to the node. +// +// Arguments: +// node - The node for which to compute a source count. +// +// Retures: +// The number of registers available as sources for `node`. +// +static int ComputeAvailableSrcCount(GenTree* node) +{ + int numSources = 0; + for (GenTree* operand : node->Operands()) + { + numSources += ComputeOperandDstCount(operand); + } + + return numSources; +} +#endif + +void LinearScan::buildRefPositionsForNode(GenTree* tree, + BasicBlock* block, + LocationInfoListNodePool& listNodePool, + HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap, + LsraLocation currentLoc) +{ +#ifdef _TARGET_ARM_ + assert(!isRegPairType(tree->TypeGet())); +#endif // _TARGET_ARM_ + + // The LIR traversal doesn't visit non-aggregate GT_LIST or GT_ARGPLACE nodes + assert(tree->OperGet() != GT_ARGPLACE); + assert((tree->OperGet() != GT_LIST) || tree->AsArgList()->IsAggregate()); + + // These nodes are eliminated by the Rationalizer. + if (tree->OperGet() == GT_CLS_VAR) + { + JITDUMP("Unexpected node %s in LSRA.\n", GenTree::NodeName(tree->OperGet())); + assert(!"Unexpected node in LSRA."); + } + + // The set of internal temporary registers used by this node are stored in the + // gtRsvdRegs register mask. Clear it out. + tree->gtRsvdRegs = RBM_NONE; + +#ifdef DEBUG + if (VERBOSE) + { + JITDUMP("at start of tree, map contains: { "); + bool first = true; + for (auto kvp : operandToLocationInfoMap) + { + GenTree* node = kvp.Key(); + LocationInfoList defList = kvp.Value(); + + JITDUMP("%sN%03u. %s -> (", first ? "" : "; ", node->gtSeqNum, GenTree::NodeName(node->OperGet())); + for (LocationInfoListNode *def = defList.Begin(), *end = defList.End(); def != end; def = def->Next()) + { + JITDUMP("%s%d.N%03u", def == defList.Begin() ? "" : ", ", def->loc, def->treeNode->gtSeqNum); + } + JITDUMP(")"); + + first = false; + } + JITDUMP(" }\n"); + } +#endif // DEBUG + + TreeNodeInfo info = tree->gtLsraInfo; + assert(info.IsValid(this)); + int consume = info.srcCount; + int produce = info.dstCount; + + assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume)); + + if (isCandidateLocalRef(tree) && !tree->OperIsLocalStore()) + { + assert(consume == 0); + + // We handle tracked variables differently from non-tracked ones. If it is tracked, + // we simply add a use or def of the tracked variable. Otherwise, for a use we need + // to actually add the appropriate references for loading or storing the variable. + // + // It won't actually get used or defined until the appropriate ancestor tree node + // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument + // to a call + + Interval* interval = getIntervalForLocalVar(tree->gtLclVarCommon.gtLclNum); + regMaskTP candidates = getUseCandidates(tree); + regMaskTP fixedAssignment = fixedCandidateMask(tree->TypeGet(), candidates); + + // We have only approximate last-use information at this point. This is because the + // execution order doesn't actually reflect the true order in which the localVars + // are referenced - but the order of the RefPositions will, so we recompute it after + // RefPositions are built. + // Use the old value for setting currentLiveVars - note that we do this with the + // not-quite-correct setting of lastUse. However, this is OK because + // 1) this is only for preferencing, which doesn't require strict correctness, and + // 2) the cases where these out-of-order uses occur should not overlap a kill. + // TODO-Throughput: clean this up once we have the execution order correct. At that point + // we can update currentLiveVars at the same place that we create the RefPosition. + if ((tree->gtFlags & GTF_VAR_DEATH) != 0) + { + VarSetOps::RemoveElemD(compiler, currentLiveVars, + compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvVarIndex); + } + + JITDUMP("t%u (i:%u)\n", currentLoc, interval->intervalIndex); + + if (!info.isLocalDefUse) + { + if (produce != 0) + { + LocationInfoList list(listNodePool.GetNode(currentLoc, interval, tree)); + bool added = operandToLocationInfoMap.AddOrUpdate(tree, list); + assert(added); + + tree->gtLsraInfo.definesAnyRegisters = true; + } + + return; + } + else + { + JITDUMP(" Not added to map\n"); + regMaskTP candidates = getUseCandidates(tree); + + if (fixedAssignment != RBM_NONE) + { + candidates = fixedAssignment; + } + RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeUse, tree, candidates); + pos->isLocalDefUse = true; + bool isLastUse = ((tree->gtFlags & GTF_VAR_DEATH) != 0); + pos->lastUse = isLastUse; + pos->setAllocateIfProfitable(tree->IsRegOptional()); + DBEXEC(VERBOSE, pos->dump()); + return; + } + } + +#ifdef DEBUG + if (VERBOSE) + { + lsraDispNode(tree, LSRA_DUMP_REFPOS, (produce != 0)); + JITDUMP("\n"); + JITDUMP(" consume=%d produce=%d\n", consume, produce); + } +#endif // DEBUG + + // Handle the case of local variable assignment + Interval* varDefInterval = nullptr; + RefType defRefType = RefTypeDef; + + GenTree* defNode = tree; + + // noAdd means the node creates a def but for purposes of map + // management do not add it because data is not flowing up the + // tree but over (as in ASG nodes) + + bool noAdd = info.isLocalDefUse; + RefPosition* prevPos = nullptr; + + bool isSpecialPutArg = false; + + assert(!tree->OperIsAssignment()); + if (tree->OperIsLocalStore()) + { + if (isCandidateLocalRef(tree)) + { + // We always push the tracked lclVar intervals + varDefInterval = getIntervalForLocalVar(tree->gtLclVarCommon.gtLclNum); + defRefType = refTypeForLocalRefNode(tree); + defNode = tree; + if (produce == 0) + { + produce = 1; + noAdd = true; + } + + assert(consume <= MAX_RET_REG_COUNT); + if (consume == 1) + { + // Get the location info for the register defined by the first operand. + LocationInfoList operandDefs; + bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs); + assert(found); + + // Since we only expect to consume one register, we should only have a single register to + // consume. + assert(operandDefs.Begin()->Next() == operandDefs.End()); + + LocationInfo& operandInfo = *static_cast<LocationInfo*>(operandDefs.Begin()); + + Interval* srcInterval = operandInfo.interval; + if (srcInterval->relatedInterval == nullptr) + { + // Preference the source to the dest, unless this is a non-last-use localVar. + // Note that the last-use info is not correct, but it is a better approximation than preferencing + // the source to the dest, if the source's lifetime extends beyond the dest. + if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0) + { + srcInterval->assignRelatedInterval(varDefInterval); + } + } + else if (!srcInterval->isLocalVar) + { + // Preference the source to dest, if src is not a local var. + srcInterval->assignRelatedInterval(varDefInterval); + } + + // We can have a case where the source of the store has a different register type, + // e.g. when the store is of a return value temp, and op1 is a Vector2 + // (TYP_SIMD8). We will need to set the + // src candidates accordingly on op1 so that LSRA will generate a copy. + // We could do this during Lowering, but at that point we don't know whether + // this lclVar will be a register candidate, and if not, we would prefer to leave + // the type alone. + if (regType(tree->gtGetOp1()->TypeGet()) != regType(tree->TypeGet())) + { + tree->gtGetOp1()->gtLsraInfo.setSrcCandidates(this, allRegs(tree->TypeGet())); + } + } + + if ((tree->gtFlags & GTF_VAR_DEATH) == 0) + { + VarSetOps::AddElemD(compiler, currentLiveVars, + compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvVarIndex); + } + } + } + else if (noAdd && produce == 0) + { + // This is the case for dead nodes that occur after + // tree rationalization + // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation. + if (tree->IsMultiRegCall()) + { + // In case of multi-reg call node, produce = number of return registers + produce = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); + } + else + { + produce = 1; + } + } + +#ifdef DEBUG + if (VERBOSE) + { + if (produce) + { + if (varDefInterval != nullptr) + { + printf("t%u (i:%u) = op ", currentLoc, varDefInterval->intervalIndex); + } + else + { + for (int i = 0; i < produce; i++) + { + printf("t%u ", currentLoc); + } + printf("= op "); + } + } + else + { + printf(" op "); + } + printf("\n"); + } +#endif // DEBUG + + Interval* prefSrcInterval = nullptr; + + // If this is a binary operator that will be encoded with 2 operand fields + // (i.e. the target is read-modify-write), preference the dst to op1. + + bool hasDelayFreeSrc = tree->gtLsraInfo.hasDelayFreeSrc; + if (tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && + (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0) + { + // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, + // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register + // (which is actually what we hope will happen). + JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n"); + + // Get the register information for the first operand of the node. + LocationInfoList operandDefs; + bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs); + assert(found); + + // Preference the destination to the interval of the first register defined by the first operand. + Interval* srcInterval = operandDefs.Begin()->interval; + assert(srcInterval->isLocalVar); + prefSrcInterval = srcInterval; + isSpecialPutArg = true; + } + + RefPosition* internalRefs[MaxInternalRegisters]; + + // make intervals for all the 'internal' register requirements for this node + // where internal means additional registers required temporarily + int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs); + + // pop all ref'd tree temps + GenTreeOperandIterator iterator = tree->OperandsBegin(); + + // `operandDefs` holds the list of `LocationInfo` values for the registers defined by the current + // operand. `operandDefsIterator` points to the current `LocationInfo` value in `operandDefs`. + LocationInfoList operandDefs; + LocationInfoListNode* operandDefsIterator = operandDefs.End(); + for (int useIndex = 0; useIndex < consume; useIndex++) + { + // If we've consumed all of the registers defined by the current operand, advance to the next + // operand that defines any registers. + if (operandDefsIterator == operandDefs.End()) + { + // Skip operands that do not define any registers, whether directly or indirectly. + GenTree* operand; + do + { + assert(iterator != tree->OperandsEnd()); + operand = *iterator; + + ++iterator; + } while (!operand->gtLsraInfo.definesAnyRegisters); + + // If we have already processed a previous operand, return its `LocationInfo` list to the + // pool. + if (useIndex > 0) + { + assert(!operandDefs.IsEmpty()); + listNodePool.ReturnNodes(operandDefs); + } + + // Remove the list of registers defined by the current operand from the map. Note that this + // is only correct because tree nodes are singly-used: if this property ever changes (e.g. + // if tree nodes are eventually allowed to be multiply-used), then the removal is only + // correct at the last use. + bool removed = operandToLocationInfoMap.TryRemove(operand, &operandDefs); + assert(removed); + + // Move the operand def iterator to the `LocationInfo` for the first register defined by the + // current operand. + operandDefsIterator = operandDefs.Begin(); + assert(operandDefsIterator != operandDefs.End()); + } + + LocationInfo& locInfo = *static_cast<LocationInfo*>(operandDefsIterator); + operandDefsIterator = operandDefsIterator->Next(); + + JITDUMP("t%u ", locInfo.loc); + + // for interstitial tree temps, a use is always last and end; + // this is set by default in newRefPosition + GenTree* useNode = locInfo.treeNode; + assert(useNode != nullptr); + var_types type = useNode->TypeGet(); + regMaskTP candidates = getUseCandidates(useNode); + Interval* i = locInfo.interval; + unsigned multiRegIdx = locInfo.multiRegIdx; + +#ifdef FEATURE_SIMD + // In case of multi-reg call store to a local, there won't be any mismatch of + // use candidates with the type of the tree node. + if (tree->OperIsLocalStore() && varDefInterval == nullptr && !useNode->IsMultiRegCall()) + { + // This is a non-candidate store. If this is a SIMD type, the use candidates + // may not match the type of the tree node. If that is the case, change the + // type of the tree node to match, so that we do the right kind of store. + if ((candidates & allRegs(tree->gtType)) == RBM_NONE) + { + noway_assert((candidates & allRegs(useNode->gtType)) != RBM_NONE); + // Currently, the only case where this should happen is for a TYP_LONG + // source and a TYP_SIMD8 target. + assert((useNode->gtType == TYP_LONG && tree->gtType == TYP_SIMD8) || + (useNode->gtType == TYP_SIMD8 && tree->gtType == TYP_LONG)); + tree->gtType = useNode->gtType; + } + } +#endif // FEATURE_SIMD + + bool delayRegFree = (hasDelayFreeSrc && useNode->gtLsraInfo.isDelayFree); + if (useNode->gtLsraInfo.isTgtPref) + { + prefSrcInterval = i; + } + + bool regOptionalAtUse = useNode->IsRegOptional(); + bool isLastUse = true; + if (isCandidateLocalRef(useNode)) + { + isLastUse = ((useNode->gtFlags & GTF_VAR_DEATH) != 0); + } + else + { + // For non-localVar uses we record nothing, + // as nothing needs to be written back to the tree. + useNode = nullptr; + } + + regMaskTP fixedAssignment = fixedCandidateMask(type, candidates); + if (fixedAssignment != RBM_NONE) + { + candidates = fixedAssignment; + } + + RefPosition* pos; + if ((candidates & allRegs(i->registerType)) == 0) + { + // This should only occur where we've got a type mismatch due to SIMD + // pointer-size types that are passed & returned as longs. + i->hasConflictingDefUse = true; + if (fixedAssignment != RBM_NONE) + { + // Explicitly insert a FixedRefPosition and fake the candidates, because otherwise newRefPosition + // will complain about the types not matching. + regNumber physicalReg = genRegNumFromMask(fixedAssignment); + RefPosition* pos = newRefPosition(physicalReg, currentLoc, RefTypeFixedReg, nullptr, fixedAssignment); + } + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), multiRegIdx); + pos->registerAssignment = candidates; + } + else + { + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, multiRegIdx); + } + if (delayRegFree) + { + hasDelayFreeSrc = true; + pos->delayRegFree = true; + } + + if (isLastUse) + { + pos->lastUse = true; + } + + if (regOptionalAtUse) + { + pos->setAllocateIfProfitable(1); + } + } + JITDUMP("\n"); + + if (!operandDefs.IsEmpty()) + { + listNodePool.ReturnNodes(operandDefs); + } + + buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount); + + RegisterType registerType = getDefType(tree); + regMaskTP candidates = getDefCandidates(tree); + regMaskTP useCandidates = getUseCandidates(tree); + +#ifdef DEBUG + if (VERBOSE) + { + printf("Def candidates "); + dumpRegMask(candidates); + printf(", Use candidates "); + dumpRegMask(useCandidates); + printf("\n"); + } +#endif // DEBUG + +#if defined(_TARGET_AMD64_) + // Multi-reg call node is the only node that could produce multi-reg value + assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT)); +#elif defined(_TARGET_ARM_) + assert(!varTypeIsMultiReg(tree->TypeGet())); +#endif // _TARGET_xxx_ + + // Add kill positions before adding def positions + buildKillPositionsForNode(tree, currentLoc + 1); + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + VARSET_TP VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::UninitVal()); + if (RBM_FLT_CALLEE_SAVED != RBM_NONE) + { + // Build RefPositions for saving any live large vectors. + // This must be done after the kills, so that we know which large vectors are still live. + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc)); + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + ReturnTypeDesc* retTypeDesc = nullptr; + bool isMultiRegCall = tree->IsMultiRegCall(); + if (isMultiRegCall) + { + retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + assert((int)genCountBits(candidates) == produce); + assert(candidates == retTypeDesc->GetABIReturnRegs()); + } + + // push defs + LocationInfoList locationInfoList; + LsraLocation defLocation = currentLoc + 1; + for (int i = 0; i < produce; i++) + { + regMaskTP currCandidates = candidates; + Interval* interval = varDefInterval; + + // In case of multi-reg call node, registerType is given by + // the type of ith position return register. + if (isMultiRegCall) + { + registerType = retTypeDesc->GetReturnRegType((unsigned)i); + currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i)); + useCandidates = allRegs(registerType); + } + + if (interval == nullptr) + { + // Make a new interval + interval = newInterval(registerType); + if (hasDelayFreeSrc) + { + interval->hasNonCommutativeRMWDef = true; + } + else if (tree->OperIsConst()) + { + assert(!tree->IsReuseRegVal()); + interval->isConstant = true; + } + + if ((currCandidates & useCandidates) != RBM_NONE) + { + interval->updateRegisterPreferences(currCandidates & useCandidates); + } + + if (isSpecialPutArg) + { + interval->isSpecialPutArg = true; + } + } + else + { + assert(registerTypesEquivalent(interval->registerType, registerType)); + } + + if (prefSrcInterval != nullptr) + { + interval->assignRelatedIntervalIfUnassigned(prefSrcInterval); + } + + // for assignments, we want to create a refposition for the def + // but not push it + if (!noAdd) + { + locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned)i)); + } + + RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i); + if (info.isLocalDefUse) + { + pos->isLocalDefUse = true; + pos->lastUse = true; + } + DBEXEC(VERBOSE, pos->dump()); + interval->updateRegisterPreferences(currCandidates); + interval->updateRegisterPreferences(useCandidates); + } + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors); +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + bool isContainedNode = + !noAdd && consume == 0 && produce == 0 && (tree->OperIsAggregate() || (tree->TypeGet() != TYP_VOID && !tree->OperIsStore())); + if (isContainedNode) + { + // Contained nodes map to the concatenated lists of their operands. + for (GenTree* op : tree->Operands()) + { + if (!op->gtLsraInfo.definesAnyRegisters) + { + assert(ComputeOperandDstCount(op) == 0); + continue; + } + + LocationInfoList operandList; + bool removed = operandToLocationInfoMap.TryRemove(op, &operandList); + assert(removed); + + locationInfoList.Append(operandList); + } + } + + if (!locationInfoList.IsEmpty()) + { + bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList); + assert(added); + tree->gtLsraInfo.definesAnyRegisters = true; + } +} + +// make an interval for each physical register +void LinearScan::buildPhysRegRecords() +{ + RegisterType regType = IntRegisterType; + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* curr = &physRegs[reg]; + curr->init(reg); + } +} + +BasicBlock* getNonEmptyBlock(BasicBlock* block) +{ + while (block != nullptr && block->bbTreeList == nullptr) + { + BasicBlock* nextBlock = block->bbNext; + // Note that here we use the version of NumSucc that does not take a compiler. + // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan. + // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which + // case we don't need the version that takes a compiler. + assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE))); + // sometimes the first block is empty and ends with an uncond branch + // assert( block->GetSucc(0) == nextBlock); + block = nextBlock; + } + assert(block != nullptr && block->bbTreeList != nullptr); + return block; +} + +void LinearScan::insertZeroInitRefPositions() +{ + // insert defs for this, then a block boundary + + VARSET_ITER_INIT(compiler, iter, compiler->fgFirstBB->bbLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + if (!varDsc->lvIsParam && isCandidateVar(varDsc) && + (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))) + { + GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); + JITDUMP("V%02u was live in\n", varNum); + Interval* interval = getIntervalForLocalVar(varNum); + RefPosition* pos = + newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); + varDsc->lvMustInit = true; + } + } +} + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +// ----------------------------------------------------------------------- +// Sets the register state for an argument of type STRUCT for System V systems. +// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp +// for how state for argument is updated for unix non-structs and Windows AMD64 structs. +void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) +{ + assert(varTypeIsStruct(argDsc)); + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + + if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + else + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + } + + if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + else + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + } +} + +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +//------------------------------------------------------------------------ +// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate +// regState (either compiler->intRegState or compiler->floatRegState), +// with the lvArgReg on "argDsc" +// +// Arguments: +// argDsc - the argument for which the state is to be updated. +// +// Return Value: None +// +// Assumptions: +// The argument is live on entry to the function +// (or is untracked and therefore assumed live) +// +// Notes: +// This relies on a method in regAlloc.cpp that is shared between LSRA +// and regAlloc. It is further abstracted here because regState is updated +// separately for tracked and untracked variables in LSRA. +// +void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) +{ +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For System V AMD64 calls the argDsc can have 2 registers (for structs.) + // Handle them here. + if (varTypeIsStruct(argDsc)) + { + unixAmd64UpdateRegStateForArg(argDsc); + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + // In the case of AMD64 we'll still use the floating point registers + // to model the register usage for argument on vararg calls, so + // we will ignore the varargs condition to determine whether we use + // XMM registers or not for setting up the call. + bool isFloat = (isFloatRegType(argDsc->lvType) +#ifndef _TARGET_AMD64_ + && !compiler->info.compIsVarArgs +#endif + ); + + if (argDsc->lvIsHfaRegArg()) + { + isFloat = true; + } + + if (isFloat) + { + JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); + compiler->raUpdateRegStateForArg(floatRegState, argDsc); + } + else + { + JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); +#if FEATURE_MULTIREG_ARGS + if (argDsc->lvOtherArgReg != REG_NA) + { + JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg)); + } +#endif // FEATURE_MULTIREG_ARGS + compiler->raUpdateRegStateForArg(intRegState, argDsc); + } + } +} + +//------------------------------------------------------------------------ +// findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables. +// +// Arguments: +// block - The block for which we're selecting a predecesor. +// prevBlock - The previous block in in allocation order. +// pPredBlockIsAllocated - A debug-only argument that indicates whether any of the predecessors have been seen +// in allocation order. +// +// Return Value: +// The selected predecessor. +// +// Assumptions: +// in DEBUG, caller initializes *pPredBlockIsAllocated to false, and it will be set to true if the block +// returned is in fact a predecessor. +// +// Notes: +// This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be +// one of: +// LSRA_BLOCK_BOUNDARY_PRED - Use the register locations of a predecessor block (default) +// LSRA_BLOCK_BOUNDARY_LAYOUT - Use the register locations of the previous block in layout order. +// This is the only case where this actually returns a different block. +// LSRA_BLOCK_BOUNDARY_ROTATE - Rotate the register locations from a predecessor. +// For this case, the block returned is the same as for LSRA_BLOCK_BOUNDARY_PRED, but +// the register locations will be "rotated" to stress the resolution and allocation +// code. + +BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block, + BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated)) +{ + BasicBlock* predBlock = nullptr; +#ifdef DEBUG + assert(*pPredBlockIsAllocated == false); + if (getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_LAYOUT) + { + if (prevBlock != nullptr) + { + predBlock = prevBlock; + } + } + else +#endif // DEBUG + if (block != compiler->fgFirstBB) + { + predBlock = block->GetUniquePred(compiler); + if (predBlock != nullptr) + { + if (isBlockVisited(predBlock)) + { + if (predBlock->bbJumpKind == BBJ_COND) + { + // Special handling to improve matching on backedges. + BasicBlock* otherBlock = (block == predBlock->bbNext) ? predBlock->bbJumpDest : predBlock->bbNext; + noway_assert(otherBlock != nullptr); + if (isBlockVisited(otherBlock)) + { + // This is the case when we have a conditional branch where one target has already + // been visited. It would be best to use the same incoming regs as that block, + // so that we have less likelihood of having to move registers. + // For example, in determining the block to use for the starting register locations for + // "block" in the following example, we'd like to use the same predecessor for "block" + // as for "otherBlock", so that both successors of predBlock have the same locations, reducing + // the likelihood of needing a split block on a backedge: + // + // otherPred + // | + // otherBlock <-+ + // . . . | + // | + // predBlock----+ + // | + // block + // + for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext) + { + BasicBlock* otherPred = pred->flBlock; + if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum) + { + predBlock = otherPred; + break; + } + } + } + } + } + else + { + predBlock = nullptr; + } + } + else + { + for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext) + { + BasicBlock* candidatePredBlock = pred->flBlock; + if (isBlockVisited(candidatePredBlock)) + { + if (predBlock == nullptr || predBlock->bbWeight < candidatePredBlock->bbWeight) + { + predBlock = candidatePredBlock; + INDEBUG(*pPredBlockIsAllocated = true;) + } + } + } + } + if (predBlock == nullptr) + { + predBlock = prevBlock; + assert(predBlock != nullptr); + JITDUMP("\n\nNo allocated predecessor; "); + } + } + return predBlock; +} + +void LinearScan::buildIntervals() +{ + BasicBlock* block; + + // start numbering at 1; 0 is the entry + LsraLocation currentLoc = 1; + + JITDUMP("\nbuildIntervals ========\n"); + + // Now build (empty) records for all of the physical registers + buildPhysRegRecords(); + +#ifdef DEBUG + if (VERBOSE) + { + printf("\n-----------------\n"); + printf("LIVENESS:\n"); + printf("-----------------\n"); + foreach_block(compiler, block) + { + printf("BB%02u use def in out\n", block->bbNum); + dumpConvertedVarSet(compiler, block->bbVarUse); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbVarDef); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbLiveIn); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbLiveOut); + printf("\n"); + } + } +#endif // DEBUG + + identifyCandidates(); + + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); + + // second part: + JITDUMP("\nbuildIntervals second part ========\n"); + currentLoc = 0; + + // Next, create ParamDef RefPositions for all the tracked parameters, + // in order of their varIndex + + LclVarDsc* argDsc; + unsigned int lclNum; + + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; + floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; + + for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) + { + lclNum = compiler->lvaTrackedToVarNum[varIndex]; + argDsc = &(compiler->lvaTable[lclNum]); + + if (!argDsc->lvIsParam) + { + continue; + } + + // Only reserve a register if the argument is actually used. + // Is it dead on entry? If compJmpOpUsed is true, then the arguments + // have to be kept alive, so we have to consider it as live on entry. + // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we + // won't have done dataflow on it, but it needs to be marked as live-in so + // it will get saved in the prolog. + if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode) + { + continue; + } + + if (argDsc->lvIsRegArg) + { + updateRegStateForArg(argDsc); + } + + if (isCandidateVar(argDsc)) + { + Interval* interval = getIntervalForLocalVar(lclNum); + regMaskTP mask = allRegs(TypeGet(argDsc)); + if (argDsc->lvIsRegArg) + { + // Set this interval as currently assigned to that register + regNumber inArgReg = argDsc->lvArgReg; + assert(inArgReg < REG_COUNT); + mask = genRegMask(inArgReg); + assignPhysReg(inArgReg, interval); + } + RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); + } + else if (varTypeIsStruct(argDsc->lvType)) + { + for (unsigned fieldVarNum = argDsc->lvFieldLclStart; + fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) + { + LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]); + if (fieldVarDsc->lvLRACandidate) + { + Interval* interval = getIntervalForLocalVar(fieldVarNum); + RefPosition* pos = + newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc))); + } + } + } + else + { + // We can overwrite the register (i.e. codegen saves it on entry) + assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister || + !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode)); + } + } + + // Now set up the reg state for the non-tracked args + // (We do this here because we want to generate the ParamDef RefPositions in tracked + // order, so that loop doesn't hit the non-tracked args) + + for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++) + { + argDsc = &(compiler->lvaTable[argNum]); + + if (argDsc->lvPromotedStruct()) + { + noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = argDsc->lvFieldLclStart; + argDsc = &(compiler->lvaTable[fieldVarNum]); + } + noway_assert(argDsc->lvIsParam); + if (!argDsc->lvTracked && argDsc->lvIsRegArg) + { + updateRegStateForArg(argDsc); + } + } + + // If there is a secret stub param, it is also live in + if (compiler->info.compPublishStubParam) + { + intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; + } + + LocationInfoListNodePool listNodePool(compiler, 8); + SmallHashTable<GenTree*, LocationInfoList, 32> operandToLocationInfoMap(compiler); + + BasicBlock* predBlock = nullptr; + BasicBlock* prevBlock = nullptr; + + // Initialize currentLiveVars to the empty set. We will set it to the current + // live-in at the entry to each block (this will include the incoming args on + // the first block). + VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler)); + + for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) + { + JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum); + + bool predBlockIsAllocated = false; + predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); + + if (block == compiler->fgFirstBB) + { + insertZeroInitRefPositions(); + } + + // Determine if we need any DummyDefs. + // We need DummyDefs for cases where "predBlock" isn't really a predecessor. + // Note that it's possible to have uses of unitialized variables, in which case even the first + // block may require DummyDefs, which we are not currently adding - this means that these variables + // will always be considered to be in memory on entry (and reloaded when the use is encountered). + // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized + // variables (which may actually be initialized along the dynamically executed paths, but not + // on all static paths), we wind up with excessive liveranges for some of these variables. + VARSET_TP VARSET_INIT(compiler, newLiveIn, block->bbLiveIn); + if (predBlock) + { + JITDUMP("\n\nSetting incoming variable registers of BB%02u to outVarToRegMap of BB%02u\n", block->bbNum, + predBlock->bbNum); + assert(predBlock->bbNum <= bbNumMaxBeforeResolution); + blockInfo[block->bbNum].predBBNum = predBlock->bbNum; + // Compute set difference: newLiveIn = block->bbLiveIn - predBlock->bbLiveOut + VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); + } + bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); + + // Create dummy def RefPositions + + if (needsDummyDefs) + { + // If we are using locations from a predecessor, we should never require DummyDefs. + assert(!predBlockIsAllocated); + + JITDUMP("Creating dummy definitions\n"); + VARSET_ITER_INIT(compiler, iter, newLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. + // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs). + if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam)) + { + Interval* interval = getIntervalForLocalVar(varNum); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, allRegs(interval->registerType)); + } + } + JITDUMP("Finished creating dummy definitions\n\n"); + } + + // Add a dummy RefPosition to mark the block boundary. + // Note that we do this AFTER adding the exposed uses above, because the + // register positions for those exposed uses need to be recorded at + // this point. + + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); + + VarSetOps::Assign(compiler, currentLiveVars, block->bbLiveIn); + + LIR::Range& blockRange = LIR::AsRange(block); + for (GenTree* node : blockRange.NonPhiNodes()) + { + assert(node->gtLsraInfo.loc >= currentLoc); + assert(((node->gtLIRFlags & LIR::Flags::IsUnusedValue) == 0) || node->gtLsraInfo.isLocalDefUse); + + currentLoc = node->gtLsraInfo.loc; + buildRefPositionsForNode(node, block, listNodePool, operandToLocationInfoMap, currentLoc); + +#ifdef DEBUG + if (currentLoc > maxNodeLocation) + { + maxNodeLocation = currentLoc; + } +#endif // DEBUG + } + + // Increment the LsraLocation at this point, so that the dummy RefPositions + // will not have the same LsraLocation as any "real" RefPosition. + currentLoc += 2; + + // Note: the visited set is cleared in LinearScan::doLinearScan() + markBlockVisited(block); + + // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the + // next block, or any unvisited successors. + // This will address lclVars that are live on a backedge, as well as those that are kept + // live at a GT_JMP. + // + // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, + // and jmp call is represented using GT_JMP node which is a leaf node. + // Liveness phase keeps all the arguments of the method live till the end of + // block by adding them to liveout set of the block containing GT_JMP. + // + // The target of a GT_JMP implicitly uses all the current method arguments, however + // there are no actual references to them. This can cause LSRA to assert, because + // the variables are live but it sees no references. In order to correctly model the + // liveness of these arguments, we add dummy exposed uses, in the same manner as for + // backward branches. This will happen automatically via expUseSet. + // + // Note that a block ending with GT_JMP has no successors and hence the variables + // for which dummy use ref positions are added are arguments of the method. + + VARSET_TP VARSET_INIT(compiler, expUseSet, block->bbLiveOut); + BasicBlock* nextBlock = getNextBlock(); + if (nextBlock != nullptr) + { + VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); + } + AllSuccessorIter succsEnd = block->GetAllSuccs(compiler).end(); + for (AllSuccessorIter succs = block->GetAllSuccs(compiler).begin(); + succs != succsEnd && !VarSetOps::IsEmpty(compiler, expUseSet); ++succs) + { + BasicBlock* succ = (*succs); + if (isBlockVisited(succ)) + { + continue; + } + VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); + } + + if (!VarSetOps::IsEmpty(compiler, expUseSet)) + { + JITDUMP("Exposed uses:"); + VARSET_ITER_INIT(compiler, iter, expUseSet, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + if (isCandidateVar(varDsc)) + { + Interval* interval = getIntervalForLocalVar(varNum); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + JITDUMP(" V%02u", varNum); + } + } + JITDUMP("\n"); + } + + // Identify the last uses of each variable, except in the case of MinOpts, where all vars + // are kept live everywhere. + + if (!compiler->opts.MinOpts()) + { + setLastUses(block); + } + +#ifdef DEBUG + if (VERBOSE) + { + printf("use: "); + dumpConvertedVarSet(compiler, block->bbVarUse); + printf("\ndef: "); + dumpConvertedVarSet(compiler, block->bbVarDef); + printf("\n"); + } +#endif // DEBUG + + prevBlock = block; + } + + // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end + if (compiler->lvaKeepAliveAndReportThis()) + { + unsigned keepAliveVarNum = compiler->info.compThisArg; + assert(compiler->info.compIsStatic == false); + if (isCandidateVar(&compiler->lvaTable[keepAliveVarNum])) + { + JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n"); + Interval* interval = getIntervalForLocalVar(keepAliveVarNum); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + } + } + +#ifdef DEBUG + if (getLsraExtendLifeTimes()) + { + LclVarDsc* varDsc; + for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) + { + if (varDsc->lvLRACandidate) + { + JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum); + Interval* interval = getIntervalForLocalVar(lclNum); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + } + } + } +#endif // DEBUG + + // If the last block has successors, create a RefTypeBB to record + // what's live + + if (prevBlock->NumSucc(compiler) > 0) + { + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); + } + +#ifdef DEBUG + // Make sure we don't have any blocks that were not visited + foreach_block(compiler, block) + { + assert(isBlockVisited(block)); + } + + if (VERBOSE) + { + lsraDumpIntervals("BEFORE VALIDATING INTERVALS"); + dumpRefPositions("BEFORE VALIDATING INTERVALS"); + validateIntervals(); + } +#endif // DEBUG +} + +#ifdef DEBUG +void LinearScan::dumpVarRefPositions(const char* title) +{ + printf("\nVAR REFPOSITIONS %s\n", title); + + for (unsigned i = 0; i < compiler->lvaCount; i++) + { + Interval* interval = getIntervalForLocalVar(i); + printf("--- V%02u\n", i); + + for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) + { + ref->dump(); + } + } + + printf("\n"); +} + +void LinearScan::validateIntervals() +{ + for (unsigned i = 0; i < compiler->lvaCount; i++) + { + Interval* interval = getIntervalForLocalVar(i); + + bool defined = false; + printf("-----------------\n"); + for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) + { + ref->dump(); + RefType refType = ref->refType; + if (!defined && RefTypeIsUse(refType)) + { + if (compiler->info.compMethodName != nullptr) + { + printf("%s: ", compiler->info.compMethodName); + } + printf("LocalVar V%02u: undefined use at %u\n", i, ref->nodeLocation); + } + // Note that there can be multiple last uses if they are on disjoint paths, + // so we can't really check the lastUse flag + if (ref->lastUse) + { + defined = false; + } + if (RefTypeIsDef(refType)) + { + defined = true; + } + } + } +} +#endif // DEBUG + +// Set the default rpFrameType based upon codeGen->isFramePointerRequired() +// This was lifted from the register predictor +// +void LinearScan::setFrameType() +{ + FrameType frameType = FT_NOT_SET; + if (compiler->codeGen->isFramePointerRequired()) + { + frameType = FT_EBP_FRAME; + } + else + { + if (compiler->rpMustCreateEBPCalled == false) + { +#ifdef DEBUG + const char* reason; +#endif // DEBUG + compiler->rpMustCreateEBPCalled = true; + if (compiler->rpMustCreateEBPFrame(INDEBUG(&reason))) + { + JITDUMP("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason); + compiler->codeGen->setFrameRequired(true); + } + } + + if (compiler->codeGen->isFrameRequired()) + { + frameType = FT_EBP_FRAME; + } + else + { + frameType = FT_ESP_FRAME; + } + } + +#if DOUBLE_ALIGN + // The DOUBLE_ALIGN feature indicates whether the JIT will attempt to double-align the + // frame if needed. Note that this feature isn't on for amd64, because the stack is + // always double-aligned by default. + compiler->codeGen->setDoubleAlign(false); + + // TODO-CQ: Tune this (see regalloc.cpp, in which raCntWtdStkDblStackFP is used to + // determine whether to double-align). Note, though that there is at least one test + // (jit\opt\Perf\DoubleAlign\Locals.exe) that depends on double-alignment being set + // in certain situations. + if (!compiler->opts.MinOpts() && !compiler->codeGen->isFramePointerRequired() && compiler->compFloatingPointUsed) + { + frameType = FT_DOUBLE_ALIGN_FRAME; + } +#endif // DOUBLE_ALIGN + + switch (frameType) + { + case FT_ESP_FRAME: + noway_assert(!compiler->codeGen->isFramePointerRequired()); + noway_assert(!compiler->codeGen->isFrameRequired()); + compiler->codeGen->setFramePointerUsed(false); + break; + case FT_EBP_FRAME: + compiler->codeGen->setFramePointerUsed(true); + break; +#if DOUBLE_ALIGN + case FT_DOUBLE_ALIGN_FRAME: + noway_assert(!compiler->codeGen->isFramePointerRequired()); + compiler->codeGen->setFramePointerUsed(false); + compiler->codeGen->setDoubleAlign(true); + break; +#endif // DOUBLE_ALIGN + default: + noway_assert(!"rpFrameType not set correctly!"); + break; + } + + // If we are using FPBASE as the frame register, we cannot also use it for + // a local var. Note that we may have already added it to the register masks, + // which are computed when the LinearScan class constructor is created, and + // used during lowering. Luckily, the TreeNodeInfo only stores an index to + // the masks stored in the LinearScan class, so we only need to walk the + // unique masks and remove FPBASE. + if (frameType == FT_EBP_FRAME) + { + if ((availableIntRegs & RBM_FPBASE) != 0) + { + RemoveRegisterFromMasks(REG_FPBASE); + + // We know that we're already in "read mode" for availableIntRegs. However, + // we need to remove the FPBASE register, so subsequent users (like callers + // to allRegs()) get the right thing. The RemoveRegisterFromMasks() code + // fixes up everything that already took a dependency on the value that was + // previously read, so this completes the picture. + availableIntRegs.OverrideAssign(availableIntRegs & ~RBM_FPBASE); + } + } + + compiler->rpFrameType = frameType; +} + +// Is the copyReg given by this RefPosition still busy at the +// given location? +bool copyRegInUse(RefPosition* ref, LsraLocation loc) +{ + assert(ref->copyReg); + if (ref->getRefEndLocation() >= loc) + { + return true; + } + Interval* interval = ref->getInterval(); + RefPosition* nextRef = interval->getNextRefPosition(); + if (nextRef != nullptr && nextRef->treeNode == ref->treeNode && nextRef->getRefEndLocation() >= loc) + { + return true; + } + return false; +} + +// Determine whether the register represented by "physRegRecord" is available at least +// at the "currentLoc", and if so, return the next location at which it is in use in +// "nextRefLocationPtr" +// +bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, + LsraLocation currentLoc, + LsraLocation* nextRefLocationPtr, + RegisterType regType) +{ + *nextRefLocationPtr = MaxLocation; + LsraLocation nextRefLocation = MaxLocation; + regMaskTP regMask = genRegMask(physRegRecord->regNum); + if (physRegRecord->isBusyUntilNextKill) + { + return false; + } + + RefPosition* nextPhysReference = physRegRecord->getNextRefPosition(); + if (nextPhysReference != nullptr) + { + nextRefLocation = nextPhysReference->nodeLocation; + // if (nextPhysReference->refType == RefTypeFixedReg) nextRefLocation--; + } + else if (!physRegRecord->isCalleeSave) + { + nextRefLocation = MaxLocation - 1; + } + + Interval* assignedInterval = physRegRecord->assignedInterval; + + if (assignedInterval != nullptr) + { + RefPosition* recentReference = assignedInterval->recentRefPosition; + + // The only case where we have an assignedInterval, but recentReference is null + // is where this interval is live at procedure entry (i.e. an arg register), in which + // case it's still live and its assigned register is not available + // (Note that the ParamDef will be recorded as a recentReference when we encounter + // it, but we will be allocating registers, potentially to other incoming parameters, + // as we process the ParamDefs.) + + if (recentReference == nullptr) + { + return false; + } + + // Is this a copyReg? It is if the register assignment doesn't match. + // (the recentReference may not be a copyReg, because we could have seen another + // reference since the copyReg) + + if (!assignedInterval->isAssignedTo(physRegRecord->regNum)) + { + // Don't reassign it if it's still in use + if (recentReference->copyReg && copyRegInUse(recentReference, currentLoc)) + { + return false; + } + } + else if (!assignedInterval->isActive && assignedInterval->isConstant) + { + // Treat this as unassigned, i.e. do nothing. + // TODO-CQ: Consider adjusting the heuristics (probably in the caller of this method) + // to avoid reusing these registers. + } + // If this interval isn't active, it's available if it isn't referenced + // at this location (or the previous location, if the recent RefPosition + // is a delayRegFree). + else if (!assignedInterval->isActive && + (recentReference->refType == RefTypeExpUse || recentReference->getRefEndLocation() < currentLoc)) + { + // This interval must have a next reference (otherwise it wouldn't be assigned to this register) + RefPosition* nextReference = recentReference->nextRefPosition; + if (nextReference != nullptr) + { + if (nextReference->nodeLocation < nextRefLocation) + { + nextRefLocation = nextReference->nodeLocation; + } + } + else + { + assert(recentReference->copyReg && recentReference->registerAssignment != regMask); + } + } + else + { + return false; + } + } + if (nextRefLocation < *nextRefLocationPtr) + { + *nextRefLocationPtr = nextRefLocation; + } + +#ifdef _TARGET_ARM_ + if (regType == TYP_DOUBLE) + { + // Recurse, but check the other half this time (TYP_FLOAT) + if (!registerIsAvailable(getRegisterRecord(REG_NEXT(physRegRecord->regNum)), currentLoc, nextRefLocationPtr, + TYP_FLOAT)) + return false; + nextRefLocation = *nextRefLocationPtr; + } +#endif // _TARGET_ARM_ + + return (nextRefLocation >= currentLoc); +} + +//------------------------------------------------------------------------ +// getRegisterType: Get the RegisterType to use for the given RefPosition +// +// Arguments: +// currentInterval: The interval for the current allocation +// refPosition: The RefPosition of the current Interval for which a register is being allocated +// +// Return Value: +// The RegisterType that should be allocated for this RefPosition +// +// Notes: +// This will nearly always be identical to the registerType of the interval, except in the case +// of SIMD types of 8 bytes (currently only Vector2) when they are passed and returned in integer +// registers, or copied to a return temp. +// This method need only be called in situations where we may be dealing with the register requirements +// of a RefTypeUse RefPosition (i.e. not when we are only looking at the type of an interval, nor when +// we are interested in the "defining" type of the interval). This is because the situation of interest +// only happens at the use (where it must be copied to an integer register). + +RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* refPosition) +{ + assert(refPosition->getInterval() == currentInterval); + RegisterType regType = currentInterval->registerType; + regMaskTP candidates = refPosition->registerAssignment; +#if defined(FEATURE_SIMD) && defined(_TARGET_AMD64_) + if ((candidates & allRegs(regType)) == RBM_NONE) + { + assert((regType == TYP_SIMD8) && (refPosition->refType == RefTypeUse) && + ((candidates & allRegs(TYP_INT)) != RBM_NONE)); + regType = TYP_INT; + } +#else // !(defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)) + assert((candidates & allRegs(regType)) != RBM_NONE); +#endif // !(defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)) + return regType; +} + +//------------------------------------------------------------------------ +// tryAllocateFreeReg: Find a free register that satisfies the requirements for refPosition, +// and takes into account the preferences for the given Interval +// +// Arguments: +// currentInterval: The interval for the current allocation +// refPosition: The RefPosition of the current Interval for which a register is being allocated +// +// Return Value: +// The regNumber, if any, allocated to the RefPositon. Returns REG_NA if no free register is found. +// +// Notes: +// TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as +// reg predict does + +static const regNumber lsraRegOrder[] = {REG_VAR_ORDER}; +const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder); +static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT}; +const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt); + +regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* refPosition) +{ + regNumber foundReg = REG_NA; + + RegisterType regType = getRegisterType(currentInterval, refPosition); + const regNumber* regOrder; + unsigned regOrderSize; + if (useFloatReg(regType)) + { + regOrder = lsraRegOrderFlt; + regOrderSize = lsraRegOrderFltSize; + } + else + { + regOrder = lsraRegOrder; + regOrderSize = lsraRegOrderSize; + } + + LsraLocation currentLocation = refPosition->nodeLocation; + RefPosition* nextRefPos = refPosition->nextRefPosition; + LsraLocation nextLocation = (nextRefPos == nullptr) ? currentLocation : nextRefPos->nodeLocation; + regMaskTP candidates = refPosition->registerAssignment; + regMaskTP preferences = currentInterval->registerPreferences; + + if (RefTypeIsDef(refPosition->refType)) + { + if (currentInterval->hasConflictingDefUse) + { + resolveConflictingDefAndUse(currentInterval, refPosition); + candidates = refPosition->registerAssignment; + } + // Otherwise, check for the case of a fixed-reg def of a reg that will be killed before the + // use, or interferes at the point of use (which shouldn't happen, but Lower doesn't mark + // the contained nodes as interfering). + // Note that we may have a ParamDef RefPosition that is marked isFixedRegRef, but which + // has had its registerAssignment changed to no longer be a single register. + else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) && + !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment)) + { + regNumber defReg = refPosition->assignedReg(); + RegRecord* defRegRecord = getRegisterRecord(defReg); + + RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; + assert(currFixedRegRefPosition != nullptr && + currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation); + + // If there is another fixed reference to this register before the use, change the candidates + // on this RefPosition to include that of nextRefPos. + if (currFixedRegRefPosition->nextRefPosition != nullptr && + currFixedRegRefPosition->nextRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) + { + candidates |= nextRefPos->registerAssignment; + if (preferences == refPosition->registerAssignment) + { + preferences = candidates; + } + } + } + } + + preferences &= candidates; + if (preferences == RBM_NONE) + { + preferences = candidates; + } + regMaskTP relatedPreferences = RBM_NONE; + +#ifdef DEBUG + candidates = stressLimitRegs(refPosition, candidates); +#endif + bool mustAssignARegister = true; + assert(candidates != RBM_NONE); + + // If the related interval has no further references, it is possible that it is a source of the + // node that produces this interval. However, we don't want to use the relatedInterval for preferencing + // if its next reference is not a new definition (as it either is or will become live). + Interval* relatedInterval = currentInterval->relatedInterval; + if (relatedInterval != nullptr) + { + RefPosition* nextRelatedRefPosition = relatedInterval->getNextRefPosition(); + if (nextRelatedRefPosition != nullptr) + { + // Don't use the relatedInterval for preferencing if its next reference is not a new definition. + if (!RefTypeIsDef(nextRelatedRefPosition->refType)) + { + relatedInterval = nullptr; + } + // Is the relatedInterval simply a copy to another relatedInterval? + else if ((relatedInterval->relatedInterval != nullptr) && + (nextRelatedRefPosition->nextRefPosition != nullptr) && + (nextRelatedRefPosition->nextRefPosition->nextRefPosition == nullptr) && + (nextRelatedRefPosition->nextRefPosition->nodeLocation < + relatedInterval->relatedInterval->getNextRefLocation())) + { + // The current relatedInterval has only two remaining RefPositions, both of which + // occur prior to the next RefPosition for its relatedInterval. + // It is likely a copy. + relatedInterval = relatedInterval->relatedInterval; + } + } + } + + if (relatedInterval != nullptr) + { + // If the related interval already has an assigned register, then use that + // as the related preference. We'll take the related + // interval preferences into account in the loop over all the registers. + + if (relatedInterval->assignedReg != nullptr) + { + relatedPreferences = genRegMask(relatedInterval->assignedReg->regNum); + } + else + { + relatedPreferences = relatedInterval->registerPreferences; + } + } + + bool preferCalleeSave = currentInterval->preferCalleeSave; + + // For floating point, we want to be less aggressive about using callee-save registers. + // So in that case, we just need to ensure that the current RefPosition is covered. + RefPosition* rangeEndRefPosition; + RefPosition* lastRefPosition = currentInterval->lastRefPosition; + if (useFloatReg(currentInterval->registerType)) + { + rangeEndRefPosition = refPosition; + } + else + { + rangeEndRefPosition = currentInterval->lastRefPosition; + // If we have a relatedInterval that is not currently occupying a register, + // and whose lifetime begins after this one ends, + // we want to try to select a register that will cover its lifetime. + if ((relatedInterval != nullptr) && (relatedInterval->assignedReg == nullptr) && + (relatedInterval->getNextRefLocation() >= rangeEndRefPosition->nodeLocation)) + { + lastRefPosition = relatedInterval->lastRefPosition; + preferCalleeSave = relatedInterval->preferCalleeSave; + } + } + + // If this has a delayed use (due to being used in a rmw position of a + // non-commutative operator), its endLocation is delayed until the "def" + // position, which is one location past the use (getRefEndLocation() takes care of this). + LsraLocation rangeEndLocation = rangeEndRefPosition->getRefEndLocation(); + LsraLocation lastLocation = lastRefPosition->getRefEndLocation(); + regNumber prevReg = REG_NA; + + if (currentInterval->assignedReg) + { + bool useAssignedReg = false; + // This was an interval that was previously allocated to the given + // physical register, and we should try to allocate it to that register + // again, if possible and reasonable. + // Use it preemptively (i.e. before checking other available regs) + // only if it is preferred and available. + + RegRecord* regRec = currentInterval->assignedReg; + prevReg = regRec->regNum; + regMaskTP prevRegBit = genRegMask(prevReg); + + // Is it in the preferred set of regs? + if ((prevRegBit & preferences) != RBM_NONE) + { + // Is it currently available? + LsraLocation nextPhysRefLoc; + if (registerIsAvailable(regRec, currentLocation, &nextPhysRefLoc, currentInterval->registerType)) + { + // If the register is next referenced at this location, only use it if + // this has a fixed reg requirement (i.e. this is the reference that caused + // the FixedReg ref to be created) + + if (!regRec->conflictingFixedRegReference(refPosition)) + { + useAssignedReg = true; + } + } + } + if (useAssignedReg) + { + regNumber foundReg = prevReg; + assignPhysReg(regRec, currentInterval); + refPosition->registerAssignment = genRegMask(foundReg); + return foundReg; + } + else + { + // Don't keep trying to allocate to this register + currentInterval->assignedReg = nullptr; + } + } + + RegRecord* availablePhysRegInterval = nullptr; + Interval* intervalToUnassign = nullptr; + + // Each register will receive a score which is the sum of the scoring criteria below. + // These were selected on the assumption that they will have an impact on the "goodness" + // of a register selection, and have been tuned to a certain extent by observing the impact + // of the ordering on asmDiffs. However, there is probably much more room for tuning, + // and perhaps additional criteria. + // + // These are FLAGS (bits) so that we can easily order them and add them together. + // If the scores are equal, but one covers more of the current interval's range, + // then it wins. Otherwise, the one encountered earlier in the regOrder wins. + + enum RegisterScore + { + VALUE_AVAILABLE = 0x40, // It is a constant value that is already in an acceptable register. + COVERS = 0x20, // It is in the interval's preference set and it covers the entire lifetime. + OWN_PREFERENCE = 0x10, // It is in the preference set of this interval. + COVERS_RELATED = 0x08, // It is in the preference set of the related interval and covers the entire lifetime. + RELATED_PREFERENCE = 0x04, // It is in the preference set of the related interval. + CALLER_CALLEE = 0x02, // It is in the right "set" for the interval (caller or callee-save). + UNASSIGNED = 0x01, // It is not currently assigned to an inactive interval. + }; + + int bestScore = 0; + + // Compute the best possible score so we can stop looping early if we find it. + // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but + // probably not until we've tuned the order of these criteria. At that point, + // we'll need to avoid the short-circuit if we've got a stress option to reverse + // the selection. + int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE; + if (relatedPreferences != RBM_NONE) + { + bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED; + } + + LsraLocation bestLocation = MinLocation; + + // In non-debug builds, this will simply get optimized away + bool reverseSelect = false; +#ifdef DEBUG + reverseSelect = doReverseSelect(); +#endif // DEBUG + + // An optimization for the common case where there is only one candidate - + // avoid looping over all the other registers + + regNumber singleReg = REG_NA; + + if (genMaxOneBit(candidates)) + { + regOrderSize = 1; + singleReg = genRegNumFromMask(candidates); + regOrder = &singleReg; + } + + for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++) + { + regNumber regNum = regOrder[i]; + regMaskTP candidateBit = genRegMask(regNum); + + if (!(candidates & candidateBit)) + { + continue; + } + + candidates &= ~candidateBit; + + RegRecord* physRegRecord = getRegisterRecord(regNum); + + int score = 0; + LsraLocation nextPhysRefLocation = MaxLocation; + + // By chance, is this register already holding this interval, as a copyReg or having + // been restored as inactive after a kill? + if (physRegRecord->assignedInterval == currentInterval) + { + availablePhysRegInterval = physRegRecord; + intervalToUnassign = nullptr; + break; + } + + // Find the next RefPosition of the physical register + if (!registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType)) + { + continue; + } + + // If the register is next referenced at this location, only use it if + // this has a fixed reg requirement (i.e. this is the reference that caused + // the FixedReg ref to be created) + + if (physRegRecord->conflictingFixedRegReference(refPosition)) + { + continue; + } + + // If this is a definition of a constant interval, check to see if its value is already in this register. + if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) && + (physRegRecord->assignedInterval != nullptr) && physRegRecord->assignedInterval->isConstant) + { + noway_assert(refPosition->treeNode != nullptr); + GenTree* otherTreeNode = physRegRecord->assignedInterval->firstRefPosition->treeNode; + noway_assert(otherTreeNode != nullptr); + + if (refPosition->treeNode->OperGet() == otherTreeNode->OperGet()) + { + switch (otherTreeNode->OperGet()) + { + case GT_CNS_INT: + if ((refPosition->treeNode->AsIntCon()->IconValue() == + otherTreeNode->AsIntCon()->IconValue()) && + (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode))) + { +#ifdef _TARGET_64BIT_ + // If the constant is negative, only reuse registers of the same type. + // This is because, on a 64-bit system, we do not sign-extend immediates in registers to + // 64-bits unless they are actually longs, as this requires a longer instruction. + // This doesn't apply to a 32-bit system, on which long values occupy multiple registers. + // (We could sign-extend, but we would have to always sign-extend, because if we reuse more + // than once, we won't have access to the instruction that originally defines the constant). + if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) || + (refPosition->treeNode->AsIntCon()->IconValue() >= 0)) +#endif // _TARGET_64BIT_ + { + score |= VALUE_AVAILABLE; + } + } + break; + case GT_CNS_DBL: + { + // For floating point constants, the values must be identical, not simply compare + // equal. So we compare the bits. + if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) && + (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet())) + { + score |= VALUE_AVAILABLE; + } + break; + } + default: + // for all other 'otherTreeNode->OperGet()' kinds, we leave 'score' unchanged + break; + } + } + } + + // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that + // we don't think it isn't covering the live range. + // This doesn't handle the case where earlier RefPositions for this Interval are also + // FixedRefs of this regNum, but at least those are only interesting in the case where those + // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. + if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum)) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval, regNum)); + nextPhysRefLocation++; + } + + if ((candidateBit & preferences) != RBM_NONE) + { + score |= OWN_PREFERENCE; + if (nextPhysRefLocation > rangeEndLocation) + { + score |= COVERS; + } + } + if (relatedInterval != nullptr && (candidateBit & relatedPreferences) != RBM_NONE) + { + score |= RELATED_PREFERENCE; + if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation) + { + score |= COVERS_RELATED; + } + } + + // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other registers + // with the same score. (Note that we haven't changed the original registerAssignment on the RefPosition). + // Overload the RELATED_PREFERENCE value. + else if (candidateBit == refPosition->registerAssignment) + { + score |= RELATED_PREFERENCE; + } + + if ((preferCalleeSave && physRegRecord->isCalleeSave) || (!preferCalleeSave && !physRegRecord->isCalleeSave)) + { + score |= CALLER_CALLEE; + } + + // The register is considered unassigned if it has no assignedInterval, OR + // if its next reference is beyond the range of this interval. + if (physRegRecord->assignedInterval == nullptr || + physRegRecord->assignedInterval->getNextRefLocation() > lastLocation) + { + score |= UNASSIGNED; + } + + bool foundBetterCandidate = false; + + if (score > bestScore) + { + foundBetterCandidate = true; + } + else if (score == bestScore) + { + // Prefer a register that covers the range. + if (bestLocation <= lastLocation) + { + if (nextPhysRefLocation > bestLocation) + { + foundBetterCandidate = true; + } + } + // If both cover the range, prefer a register that is killed sooner (leaving the longer range register + // available). If both cover the range and also getting killed at the same location, prefer the one which + // is same as previous assignment. + else if (nextPhysRefLocation > lastLocation) + { + if (nextPhysRefLocation < bestLocation) + { + foundBetterCandidate = true; + } + else if (nextPhysRefLocation == bestLocation && prevReg == regNum) + { + foundBetterCandidate = true; + } + } + } + +#ifdef DEBUG + if (doReverseSelect() && bestScore != 0) + { + foundBetterCandidate = !foundBetterCandidate; + } +#endif // DEBUG + + if (foundBetterCandidate) + { + bestLocation = nextPhysRefLocation; + availablePhysRegInterval = physRegRecord; + intervalToUnassign = physRegRecord->assignedInterval; + bestScore = score; + } + + // there is no way we can get a better score so break out + if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1) + { + break; + } + } + + if (availablePhysRegInterval != nullptr) + { + if (intervalToUnassign != nullptr) + { + unassignPhysReg(availablePhysRegInterval, intervalToUnassign->recentRefPosition); + if (bestScore & VALUE_AVAILABLE) + { + assert(intervalToUnassign->isConstant); + refPosition->treeNode->SetReuseRegVal(); + refPosition->treeNode->SetInReg(); + } + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr) + { + availablePhysRegInterval->previousInterval = intervalToUnassign; + } + } + else + { + assert((bestScore & VALUE_AVAILABLE) == 0); + } + assignPhysReg(availablePhysRegInterval, currentInterval); + foundReg = availablePhysRegInterval->regNum; + regMaskTP foundRegMask = genRegMask(foundReg); + refPosition->registerAssignment = foundRegMask; + if (relatedInterval != nullptr) + { + relatedInterval->updateRegisterPreferences(foundRegMask); + } + } + + return foundReg; +} + +//------------------------------------------------------------------------ +// allocateBusyReg: Find a busy register that satisfies the requirements for refPosition, +// and that can be spilled. +// +// Arguments: +// current The interval for the current allocation +// refPosition The RefPosition of the current Interval for which a register is being allocated +// allocateIfProfitable If true, a reg may not be allocated if all other ref positions currently +// occupying registers are more important than the 'refPosition'. +// +// Return Value: +// The regNumber allocated to the RefPositon. Returns REG_NA if no free register is found. +// +// Note: Currently this routine uses weight and farthest distance of next reference +// to select a ref position for spilling. +// a) if allocateIfProfitable = false +// The ref position chosen for spilling will be the lowest weight +// of all and if there is is more than one ref position with the +// same lowest weight, among them choses the one with farthest +// distance to its next reference. +// +// b) if allocateIfProfitable = true +// The ref position chosen for spilling will not only be lowest weight +// of all but also has a weight lower than 'refPosition'. If there is +// no such ref position, reg will not be allocated. +regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable) +{ + regNumber foundReg = REG_NA; + + RegisterType regType = getRegisterType(current, refPosition); + regMaskTP candidates = refPosition->registerAssignment; + regMaskTP preferences = (current->registerPreferences & candidates); + if (preferences == RBM_NONE) + { + preferences = candidates; + } + if (candidates == RBM_NONE) + { + // This assumes only integer and floating point register types + // if we target a processor with additional register types, + // this would have to change + candidates = allRegs(regType); + } + +#ifdef DEBUG + candidates = stressLimitRegs(refPosition, candidates); +#endif // DEBUG + + // TODO-CQ: Determine whether/how to take preferences into account in addition to + // prefering the one with the furthest ref position when considering + // a candidate to spill + RegRecord* farthestRefPhysRegRecord = nullptr; + LsraLocation farthestLocation = MinLocation; + LsraLocation refLocation = refPosition->nodeLocation; + unsigned farthestRefPosWeight; + if (allocateIfProfitable) + { + // If allocating a reg is optional, we will consider those ref positions + // whose weight is less than 'refPosition' for spilling. + farthestRefPosWeight = getWeight(refPosition); + } + else + { + // If allocating a reg is a must, we start off with max weight so + // that the first spill candidate will be selected based on + // farthest distance alone. Since we start off with farthestLocation + // initialized to MinLocation, the first available ref position + // will be selected as spill candidate and its weight as the + // fathestRefPosWeight. + farthestRefPosWeight = BB_MAX_WEIGHT; + } + + for (regNumber regNum : Registers(regType)) + { + regMaskTP candidateBit = genRegMask(regNum); + if (!(candidates & candidateBit)) + { + continue; + } + RegRecord* physRegRecord = getRegisterRecord(regNum); + + if (physRegRecord->isBusyUntilNextKill) + { + continue; + } + Interval* assignedInterval = physRegRecord->assignedInterval; + + // If there is a fixed reference at the same location (and it's not due to this reference), + // don't use it. + + if (physRegRecord->conflictingFixedRegReference(refPosition)) + { + assert(candidates != candidateBit); + continue; + } + + LsraLocation physRegNextLocation = MaxLocation; + if (refPosition->isFixedRefOfRegMask(candidateBit)) + { + // Either there is a fixed reference due to this node, or one associated with a + // fixed use fed by a def at this node. + // In either case, we must use this register as it's the only candidate + // TODO-CQ: At the time we allocate a register to a fixed-reg def, if it's not going + // to remain live until the use, we should set the candidates to allRegs(regType) + // to avoid a spill - codegen can then insert the copy. + assert(candidates == candidateBit); + physRegNextLocation = MaxLocation; + farthestRefPosWeight = BB_MAX_WEIGHT; + } + else + { + physRegNextLocation = physRegRecord->getNextRefLocation(); + + // If refPosition requires a fixed register, we should reject all others. + // Otherwise, we will still evaluate all phyRegs though their next location is + // not better than farthestLocation found so far. + // + // TODO: this method should be using an approach similar to tryAllocateFreeReg() + // where it uses a regOrder array to avoid iterating over any but the single + // fixed candidate. + if (refPosition->isFixedRegRef && physRegNextLocation < farthestLocation) + { + continue; + } + } + + // If this register is not assigned to an interval, either + // - it has a FixedReg reference at the current location that is not this reference, OR + // - this is the special case of a fixed loReg, where this interval has a use at the same location + // In either case, we cannot use it + + if (assignedInterval == nullptr) + { + RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition(); + +#ifndef _TARGET_ARM64_ + // TODO-Cleanup: Revisit this after Issue #3524 is complete + // On ARM64 the nodeLocation is not always == refLocation, Disabling this assert for now. + assert(nextPhysRegPosition->nodeLocation == refLocation && candidateBit != candidates); +#endif + continue; + } + + RefPosition* recentAssignedRef = assignedInterval->recentRefPosition; + + if (!assignedInterval->isActive) + { + // The assigned interval has a reference at this location - otherwise, we would have found + // this in tryAllocateFreeReg(). + // Note that we may or may not have actually handled the reference yet, so it could either + // be recentAssigedRef, or the next reference. + assert(recentAssignedRef != nullptr); + if (recentAssignedRef->nodeLocation != refLocation) + { + if (recentAssignedRef->nodeLocation + 1 == refLocation) + { + assert(recentAssignedRef->delayRegFree); + } + else + { + RefPosition* nextAssignedRef = recentAssignedRef->nextRefPosition; + assert(nextAssignedRef != nullptr); + assert(nextAssignedRef->nodeLocation == refLocation || + (nextAssignedRef->nodeLocation + 1 == refLocation && nextAssignedRef->delayRegFree)); + } + } + continue; + } + + // If we have a recentAssignedRef, check that it is going to be OK to spill it + // + // TODO-Review: Under what conditions recentAssginedRef would be null? + unsigned recentAssignedRefWeight = BB_ZERO_WEIGHT; + if (recentAssignedRef != nullptr) + { + if (recentAssignedRef->nodeLocation == refLocation) + { + // We can't spill a register that's being used at the current location + RefPosition* physRegRef = physRegRecord->recentRefPosition; + continue; + } + + // If the current position has the candidate register marked to be delayed, + // check if the previous location is using this register, if that's the case we have to skip + // since we can't spill this register. + if (recentAssignedRef->delayRegFree && (refLocation == recentAssignedRef->nodeLocation + 1)) + { + continue; + } + + // We don't prefer to spill a register if the weight of recentAssignedRef > weight + // of the spill candidate found so far. We would consider spilling a greater weight + // ref position only if the refPosition being allocated must need a reg. + recentAssignedRefWeight = getWeight(recentAssignedRef); + if (recentAssignedRefWeight > farthestRefPosWeight) + { + continue; + } + } + + LsraLocation nextLocation = assignedInterval->getNextRefLocation(); + + // We should never spill a register that's occupied by an Interval with its next use at the current location. + // Normally this won't occur (unless we actually had more uses in a single node than there are registers), + // because we'll always find something with a later nextLocation, but it can happen in stress when + // we have LSRA_SELECT_NEAREST. + if ((nextLocation == refLocation) && !refPosition->isFixedRegRef) + { + continue; + } + + if (nextLocation > physRegNextLocation) + { + nextLocation = physRegNextLocation; + } + + bool isBetterLocation; + +#ifdef DEBUG + if (doSelectNearest() && farthestRefPhysRegRecord != nullptr) + { + isBetterLocation = (nextLocation <= farthestLocation); + } + else +#endif + // This if-stmt is associated with the above else + if (recentAssignedRefWeight < farthestRefPosWeight) + { + isBetterLocation = true; + } + else + { + // This would mean the weight of spill ref position we found so far is equal + // to the weight of the ref position that is being evaluated. In this case + // we prefer to spill ref position whose distance to its next reference is + // the farthest. + assert(recentAssignedRefWeight == farthestRefPosWeight); + + // If allocateIfProfitable=true, the first spill candidate selected + // will be based on weight alone. After we have found a spill + // candidate whose weight is less than the 'refPosition', we will + // consider farthest distance when there is a tie in weights. + // This is to ensure that we don't spill a ref position whose + // weight is equal to weight of 'refPosition'. + if (allocateIfProfitable && farthestRefPhysRegRecord == nullptr) + { + isBetterLocation = false; + } + else + { + isBetterLocation = (nextLocation > farthestLocation); + + if (nextLocation > farthestLocation) + { + isBetterLocation = true; + } + else if (nextLocation == farthestLocation) + { + // Both weight and distance are equal. + // Prefer that ref position which is marked both reload and + // allocate if profitable. These ref positions don't need + // need to be spilled as they are already in memory and + // codegen considers them as contained memory operands. + isBetterLocation = (recentAssignedRef != nullptr) && recentAssignedRef->reload && + recentAssignedRef->AllocateIfProfitable(); + } + else + { + isBetterLocation = false; + } + } + } + + if (isBetterLocation) + { + farthestLocation = nextLocation; + farthestRefPhysRegRecord = physRegRecord; + farthestRefPosWeight = recentAssignedRefWeight; + } + } + +#if DEBUG + if (allocateIfProfitable) + { + // There may not be a spill candidate or if one is found + // its weight must be less than the weight of 'refPosition' + assert((farthestRefPhysRegRecord == nullptr) || (farthestRefPosWeight < getWeight(refPosition))); + } + else + { + // Must have found a spill candidate. + assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef)); + } +#endif + + if (farthestRefPhysRegRecord != nullptr) + { + foundReg = farthestRefPhysRegRecord->regNum; + unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition); + assignPhysReg(farthestRefPhysRegRecord, current); + refPosition->registerAssignment = genRegMask(foundReg); + } + else + { + foundReg = REG_NA; + refPosition->registerAssignment = RBM_NONE; + } + + return foundReg; +} + +// Grab a register to use to copy and then immediately use. +// This is called only for localVar intervals that already have a register +// assignment that is not compatible with the current RefPosition. +// This is not like regular assignment, because we don't want to change +// any preferences or existing register assignments. +// Prefer a free register that's got the earliest next use. +// Otherwise, spill something with the farthest next use +// +regNumber LinearScan::assignCopyReg(RefPosition* refPosition) +{ + Interval* currentInterval = refPosition->getInterval(); + assert(currentInterval != nullptr); + assert(currentInterval->isActive); + + bool foundFreeReg = false; + RegRecord* bestPhysReg = nullptr; + LsraLocation bestLocation = MinLocation; + regMaskTP candidates = refPosition->registerAssignment; + + // Save the relatedInterval, if any, so that it doesn't get modified during allocation. + Interval* savedRelatedInterval = currentInterval->relatedInterval; + currentInterval->relatedInterval = nullptr; + + // We don't want really want to change the default assignment, + // so 1) pretend this isn't active, and 2) remember the old reg + regNumber oldPhysReg = currentInterval->physReg; + RegRecord* oldRegRecord = currentInterval->assignedReg; + assert(oldRegRecord->regNum == oldPhysReg); + currentInterval->isActive = false; + + regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition); + if (allocatedReg == REG_NA) + { + allocatedReg = allocateBusyReg(currentInterval, refPosition, false); + } + + // Now restore the old info + currentInterval->relatedInterval = savedRelatedInterval; + currentInterval->physReg = oldPhysReg; + currentInterval->assignedReg = oldRegRecord; + currentInterval->isActive = true; + + refPosition->copyReg = true; + return allocatedReg; +} + +// Check if the interval is already assigned and if it is then unassign the physical record +// then set the assignedInterval to 'interval' +// +void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval) +{ + if (regRec->assignedInterval != nullptr && regRec->assignedInterval != interval) + { + // This is allocated to another interval. Either it is inactive, or it was allocated as a + // copyReg and is therefore not the "assignedReg" of the other interval. In the latter case, + // we simply unassign it - in the former case we need to set the physReg on the interval to + // REG_NA to indicate that it is no longer in that register. + // The lack of checking for this case resulted in an assert in the retail version of System.dll, + // in method SerialStream.GetDcbFlag. + // Note that we can't check for the copyReg case, because we may have seen a more recent + // RefPosition for the Interval that was NOT a copyReg. + if (regRec->assignedInterval->assignedReg == regRec) + { + assert(regRec->assignedInterval->isActive == false); + regRec->assignedInterval->physReg = REG_NA; + } + unassignPhysReg(regRec->regNum); + } + + regRec->assignedInterval = interval; +} + +// Assign the given physical register interval to the given interval +void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) +{ + regMaskTP assignedRegMask = genRegMask(regRec->regNum); + compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(dumpTerse)); + + checkAndAssignInterval(regRec, interval); + interval->assignedReg = regRec; + +#ifdef _TARGET_ARM_ + if ((interval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType)) + { + regNumber nextRegNum = REG_NEXT(regRec->regNum); + RegRecord* nextRegRec = getRegisterRecord(nextRegNum); + + checkAndAssignInterval(nextRegRec, interval); + } +#endif // _TARGET_ARM_ + + interval->physReg = regRec->regNum; + interval->isActive = true; + if (interval->isLocalVar) + { + // Prefer this register for future references + interval->updateRegisterPreferences(assignedRegMask); + } +} + +//------------------------------------------------------------------------ +// spill: Spill this Interval between "fromRefPosition" and "toRefPosition" +// +// Arguments: +// fromRefPosition - The RefPosition at which the Interval is to be spilled +// toRefPosition - The RefPosition at which it must be reloaded +// +// Return Value: +// None. +// +// Assumptions: +// fromRefPosition and toRefPosition must not be null +// +void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition) +{ + assert(fromRefPosition != nullptr && toRefPosition != nullptr); + assert(fromRefPosition->getInterval() == interval && toRefPosition->getInterval() == interval); + assert(fromRefPosition->nextRefPosition == toRefPosition); + + if (!fromRefPosition->lastUse) + { + // If not allocated a register, Lcl var def/use ref positions even if reg optional + // should be marked as spillAfter. + if (!fromRefPosition->RequiresRegister() && !(interval->isLocalVar && fromRefPosition->IsActualRef())) + { + fromRefPosition->registerAssignment = RBM_NONE; + } + else + { + fromRefPosition->spillAfter = true; + } + } + assert(toRefPosition != nullptr); + +#ifdef DEBUG + if (VERBOSE) + { + dumpLsraAllocationEvent(LSRA_EVENT_SPILL, interval); + } +#endif // DEBUG + + interval->isActive = false; + interval->isSpilled = true; + + // If fromRefPosition occurs before the beginning of this block, mark this as living in the stack + // on entry to this block. + if (fromRefPosition->nodeLocation <= curBBStartLocation) + { + // This must be a lclVar interval + assert(interval->isLocalVar); + setInVarRegForBB(curBBNum, interval->varNum, REG_STK); + } +} + +//------------------------------------------------------------------------ +// unassignPhysRegNoSpill: Unassign the given physical register record from +// an active interval, without spilling. +// +// Arguments: +// regRec - the RegRecord to be unasssigned +// +// Return Value: +// None. +// +// Assumptions: +// The assignedInterval must not be null, and must be active. +// +// Notes: +// This method is used to unassign a register when an interval needs to be moved to a +// different register, but not (yet) spilled. + +void LinearScan::unassignPhysRegNoSpill(RegRecord* regRec) +{ + Interval* assignedInterval = regRec->assignedInterval; + assert(assignedInterval != nullptr && assignedInterval->isActive); + assignedInterval->isActive = false; + unassignPhysReg(regRec, nullptr); + assignedInterval->isActive = true; +} + +//------------------------------------------------------------------------ +// checkAndClearInterval: Clear the assignedInterval for the given +// physical register record +// +// Arguments: +// regRec - the physical RegRecord to be unasssigned +// spillRefPosition - The RefPosition at which the assignedInterval is to be spilled +// or nullptr if we aren't spilling +// +// Return Value: +// None. +// +// Assumptions: +// see unassignPhysReg +// +void LinearScan::checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition) +{ + Interval* assignedInterval = regRec->assignedInterval; + assert(assignedInterval != nullptr); + regNumber thisRegNum = regRec->regNum; + + if (spillRefPosition == nullptr) + { + // Note that we can't assert for the copyReg case + // + if (assignedInterval->physReg == thisRegNum) + { + assert(assignedInterval->isActive == false); + } + } + else + { + assert(spillRefPosition->getInterval() == assignedInterval); + } + + regRec->assignedInterval = nullptr; +} + +//------------------------------------------------------------------------ +// unassignPhysReg: Unassign the given physical register record, and spill the +// assignedInterval at the given spillRefPosition, if any. +// +// Arguments: +// regRec - the RegRecord to be unasssigned +// spillRefPosition - The RefPosition at which the assignedInterval is to be spilled +// +// Return Value: +// None. +// +// Assumptions: +// The assignedInterval must not be null. +// If spillRefPosition is null, the assignedInterval must be inactive, or not currently +// assigned to this register (e.g. this is a copyReg for that Interval). +// Otherwise, spillRefPosition must be associated with the assignedInterval. +// +void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition) +{ + Interval* assignedInterval = regRec->assignedInterval; + assert(assignedInterval != nullptr); + checkAndClearInterval(regRec, spillRefPosition); + regNumber thisRegNum = regRec->regNum; + +#ifdef _TARGET_ARM_ + if ((assignedInterval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType)) + { + regNumber nextRegNum = REG_NEXT(regRec->regNum); + RegRecord* nextRegRec = getRegisterRecord(nextRegNum); + checkAndClearInterval(nextRegRec, spillRefPosition); + } +#endif // _TARGET_ARM_ + +#ifdef DEBUG + if (VERBOSE && !dumpTerse) + { + printf("unassigning %s: ", getRegName(regRec->regNum)); + assignedInterval->dump(); + printf("\n"); + } +#endif // DEBUG + + RefPosition* nextRefPosition = nullptr; + if (spillRefPosition != nullptr) + { + nextRefPosition = spillRefPosition->nextRefPosition; + } + + if (assignedInterval->physReg != REG_NA && assignedInterval->physReg != thisRegNum) + { + // This must have been a temporary copy reg, but we can't assert that because there + // may have been intervening RefPositions that were not copyRegs. + regRec->assignedInterval = nullptr; + return; + } + + regNumber victimAssignedReg = assignedInterval->physReg; + assignedInterval->physReg = REG_NA; + + bool spill = assignedInterval->isActive && nextRefPosition != nullptr; + if (spill) + { + // If this is an active interval, it must have a recentRefPosition, + // otherwise it would not be active + assert(spillRefPosition != nullptr); + +#if 0 + // TODO-CQ: Enable this and insert an explicit GT_COPY (otherwise there's no way to communicate + // to codegen that we want the copyReg to be the new home location). + // If the last reference was a copyReg, and we're spilling the register + // it was copied from, then make the copyReg the new primary location + // if possible + if (spillRefPosition->copyReg) + { + regNumber copyFromRegNum = victimAssignedReg; + regNumber copyRegNum = genRegNumFromMask(spillRefPosition->registerAssignment); + if (copyFromRegNum == thisRegNum && + getRegisterRecord(copyRegNum)->assignedInterval == assignedInterval) + { + assert(copyRegNum != thisRegNum); + assignedInterval->physReg = copyRegNum; + assignedInterval->assignedReg = this->getRegisterRecord(copyRegNum); + return; + } + } +#endif // 0 +#ifdef DEBUG + // With JitStressRegs == 0x80 (LSRA_EXTEND_LIFETIMES), we may have a RefPosition + // that is not marked lastUse even though the treeNode is a lastUse. In that case + // we must not mark it for spill because the register will have been immediately freed + // after use. While we could conceivably add special handling for this case in codegen, + // it would be messy and undesirably cause the "bleeding" of LSRA stress modes outside + // of LSRA. + if (extendLifetimes() && assignedInterval->isLocalVar && RefTypeIsUse(spillRefPosition->refType) && + spillRefPosition->treeNode != nullptr && (spillRefPosition->treeNode->gtFlags & GTF_VAR_DEATH) != 0) + { + dumpLsraAllocationEvent(LSRA_EVENT_SPILL_EXTENDED_LIFETIME, assignedInterval); + assignedInterval->isActive = false; + spill = false; + // If the spillRefPosition occurs before the beginning of this block, it will have + // been marked as living in this register on entry to this block, but we now need + // to mark this as living on the stack. + if (spillRefPosition->nodeLocation <= curBBStartLocation) + { + setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK); + if (spillRefPosition->nextRefPosition != nullptr) + { + assignedInterval->isSpilled = true; + } + } + else + { + // Otherwise, we need to mark spillRefPosition as lastUse, or the interval + // will remain active beyond its allocated range during the resolution phase. + spillRefPosition->lastUse = true; + } + } + else +#endif // DEBUG + { + spillInterval(assignedInterval, spillRefPosition, nextRefPosition); + } + } + // Maintain the association with the interval, if it has more references. + // Or, if we "remembered" an interval assigned to this register, restore it. + if (nextRefPosition != nullptr) + { + assignedInterval->assignedReg = regRec; + } + else if (regRec->previousInterval != nullptr && regRec->previousInterval->assignedReg == regRec && + regRec->previousInterval->getNextRefPosition() != nullptr) + { + regRec->assignedInterval = regRec->previousInterval; + regRec->previousInterval = nullptr; +#ifdef DEBUG + if (spill) + { + dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, regRec->assignedInterval, + thisRegNum); + } + else + { + dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, regRec->assignedInterval, thisRegNum); + } +#endif // DEBUG + } + else + { + regRec->assignedInterval = nullptr; + regRec->previousInterval = nullptr; + } +} + +//------------------------------------------------------------------------ +// spillGCRefs: Spill any GC-type intervals that are currently in registers.a +// +// Arguments: +// killRefPosition - The RefPosition for the kill +// +// Return Value: +// None. +// +void LinearScan::spillGCRefs(RefPosition* killRefPosition) +{ + // For each physical register that can hold a GC type, + // if it is occupied by an interval of a GC type, spill that interval. + regMaskTP candidateRegs = killRefPosition->registerAssignment; + while (candidateRegs != RBM_NONE) + { + regMaskTP nextRegBit = genFindLowestBit(candidateRegs); + candidateRegs &= ~nextRegBit; + regNumber nextReg = genRegNumFromMask(nextRegBit); + RegRecord* regRecord = getRegisterRecord(nextReg); + Interval* assignedInterval = regRecord->assignedInterval; + if (assignedInterval == nullptr || (assignedInterval->isActive == false) || + !varTypeIsGC(assignedInterval->registerType)) + { + continue; + } + unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DONE_KILL_GC_REFS, nullptr, REG_NA, nullptr)); +} + +//------------------------------------------------------------------------ +// processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated +// +// Arguments: +// currentBlock - the BasicBlock we have just finished allocating registers for +// +// Return Value: +// None +// +// Notes: +// Calls processBlockEndLocation() to set the outVarToRegMap, then gets the next block, +// and sets the inVarToRegMap appropriately. + +void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock) +{ + assert(currentBlock != nullptr); + processBlockEndLocations(currentBlock); + markBlockVisited(currentBlock); + + // Get the next block to allocate. + // When the last block in the method has successors, there will be a final "RefTypeBB" to + // ensure that we get the varToRegMap set appropriately, but in that case we don't need + // to worry about "nextBlock". + BasicBlock* nextBlock = getNextBlock(); + if (nextBlock != nullptr) + { + processBlockStartLocations(nextBlock, true); + } +} + +//------------------------------------------------------------------------ +// rotateBlockStartLocation: When in the LSRA_BLOCK_BOUNDARY_ROTATE stress mode, attempt to +// "rotate" the register assignment for a localVar to the next higher +// register that is available. +// +// Arguments: +// interval - the Interval for the variable whose register is getting rotated +// targetReg - its register assignment from the predecessor block being used for live-in +// availableRegs - registers available for use +// +// Return Value: +// The new register to use. + +#ifdef DEBUG +regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs) +{ + if (targetReg != REG_STK && getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE) + { + // If we're rotating the register locations at block boundaries, try to use + // the next higher register number of the appropriate register type. + regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs; + regNumber firstReg = REG_NA; + regNumber newReg = REG_NA; + while (candidateRegs != RBM_NONE) + { + regMaskTP nextRegBit = genFindLowestBit(candidateRegs); + candidateRegs &= ~nextRegBit; + regNumber nextReg = genRegNumFromMask(nextRegBit); + if (nextReg > targetReg) + { + newReg = nextReg; + break; + } + else if (firstReg == REG_NA) + { + firstReg = nextReg; + } + } + if (newReg == REG_NA) + { + assert(firstReg != REG_NA); + newReg = firstReg; + } + targetReg = newReg; + } + return targetReg; +} +#endif // DEBUG + +//------------------------------------------------------------------------ +// processBlockStartLocations: Update var locations on entry to 'currentBlock' +// +// Arguments: +// currentBlock - the BasicBlock we have just finished allocating registers for +// allocationPass - true if we are currently allocating registers (versus writing them back) +// +// Return Value: +// None +// +// Notes: +// During the allocation pass, we use the outVarToRegMap of the selected predecessor to +// determine the lclVar locations for the inVarToRegMap. +// During the resolution (write-back) pass, we only modify the inVarToRegMap in cases where +// a lclVar was spilled after the block had been completed. +void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool allocationPass) +{ + unsigned predBBNum = blockInfo[currentBlock->bbNum].predBBNum; + VarToRegMap predVarToRegMap = getOutVarToRegMap(predBBNum); + VarToRegMap inVarToRegMap = getInVarToRegMap(currentBlock->bbNum); + bool hasCriticalInEdge = blockInfo[currentBlock->bbNum].hasCriticalInEdge; + + VARSET_TP VARSET_INIT_NOCOPY(liveIn, currentBlock->bbLiveIn); +#ifdef DEBUG + if (getLsraExtendLifeTimes()) + { + VarSetOps::AssignNoCopy(compiler, liveIn, compiler->lvaTrackedVars); + } + // If we are rotating register assignments at block boundaries, we want to make the + // inactive registers available for the rotation. + regMaskTP inactiveRegs = RBM_NONE; +#endif // DEBUG + regMaskTP liveRegs = RBM_NONE; + VARSET_ITER_INIT(compiler, iter, liveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + if (!compiler->lvaTable[varNum].lvLRACandidate) + { + continue; + } + regNumber targetReg; + Interval* interval = getIntervalForLocalVar(varNum); + RefPosition* nextRefPosition = interval->getNextRefPosition(); + assert(nextRefPosition != nullptr); + + if (allocationPass) + { + targetReg = predVarToRegMap[varIndex]; + INDEBUG(targetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs))); + inVarToRegMap[varIndex] = targetReg; + } + else // !allocationPass (i.e. resolution/write-back pass) + { + targetReg = inVarToRegMap[varIndex]; + // There are four cases that we need to consider during the resolution pass: + // 1. This variable had a register allocated initially, and it was not spilled in the RefPosition + // that feeds this block. In this case, both targetReg and predVarToRegMap[varIndex] will be targetReg. + // 2. This variable had not been spilled prior to the end of predBB, but was later spilled, so + // predVarToRegMap[varIndex] will be REG_STK, but targetReg is its former allocated value. + // In this case, we will normally change it to REG_STK. We will update its "spilled" status when we + // encounter it in resolveLocalRef(). + // 2a. If the next RefPosition is marked as a copyReg, we need to retain the allocated register. This is + // because the copyReg RefPosition will not have recorded the "home" register, yet downstream + // RefPositions rely on the correct "home" register. + // 3. This variable was spilled before we reached the end of predBB. In this case, both targetReg and + // predVarToRegMap[varIndex] will be REG_STK, and the next RefPosition will have been marked + // as reload during allocation time if necessary (note that by the time we actually reach the next + // RefPosition, we may be using a different predecessor, at which it is still in a register). + // 4. This variable was spilled during the allocation of this block, so targetReg is REG_STK + // (because we set inVarToRegMap at the time we spilled it), but predVarToRegMap[varIndex] + // is not REG_STK. We retain the REG_STK value in the inVarToRegMap. + if (targetReg != REG_STK) + { + if (predVarToRegMap[varIndex] != REG_STK) + { + // Case #1 above. + assert(predVarToRegMap[varIndex] == targetReg || + getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE); + } + else if (!nextRefPosition->copyReg) + { + // case #2 above. + inVarToRegMap[varIndex] = REG_STK; + targetReg = REG_STK; + } + // Else case 2a. - retain targetReg. + } + // Else case #3 or #4, we retain targetReg and nothing further to do or assert. + } + if (interval->physReg == targetReg) + { + if (interval->isActive) + { + assert(targetReg != REG_STK); + assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg && + interval->assignedReg->assignedInterval == interval); + liveRegs |= genRegMask(targetReg); + continue; + } + } + else if (interval->physReg != REG_NA) + { + // This can happen if we are using the locations from a basic block other than the + // immediately preceding one - where the variable was in a different location. + if (targetReg != REG_STK) + { + // Unassign it from the register (it will get a new register below). + if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval) + { + interval->isActive = false; + unassignPhysReg(getRegisterRecord(interval->physReg), nullptr); + } + else + { + // This interval was live in this register the last time we saw a reference to it, + // but has since been displaced. + interval->physReg = REG_NA; + } + } + else if (allocationPass) + { + // Keep the register assignment - if another var has it, it will get unassigned. + // Otherwise, resolution will fix it up later, and it will be more + // likely to match other assignments this way. + interval->isActive = true; + liveRegs |= genRegMask(interval->physReg); + INDEBUG(inactiveRegs |= genRegMask(interval->physReg)); + inVarToRegMap[varIndex] = interval->physReg; + } + else + { + interval->physReg = REG_NA; + } + } + if (targetReg != REG_STK) + { + RegRecord* targetRegRecord = getRegisterRecord(targetReg); + liveRegs |= genRegMask(targetReg); + if (!interval->isActive) + { + interval->isActive = true; + interval->physReg = targetReg; + interval->assignedReg = targetRegRecord; + } + Interval* assignedInterval = targetRegRecord->assignedInterval; + if (assignedInterval != interval) + { + // Is there another interval currently assigned to this register? If so unassign it. + if (assignedInterval != nullptr) + { + if (assignedInterval->assignedReg == targetRegRecord) + { + // If the interval is active, it will be set to active when we reach its new + // register assignment (which we must not yet have done, or it wouldn't still be + // assigned to this register). + assignedInterval->isActive = false; + unassignPhysReg(targetRegRecord, nullptr); + if (allocationPass && assignedInterval->isLocalVar && + inVarToRegMap[assignedInterval->getVarIndex(compiler)] == targetReg) + { + inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK; + } + } + else + { + // This interval is no longer assigned to this register. + targetRegRecord->assignedInterval = nullptr; + } + } + assignPhysReg(targetRegRecord, interval); + } + if (interval->recentRefPosition != nullptr && !interval->recentRefPosition->copyReg && + interval->recentRefPosition->registerAssignment != genRegMask(targetReg)) + { + interval->getNextRefPosition()->outOfOrder = true; + } + } + } + + // Unassign any registers that are no longer live. + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + if ((liveRegs & genRegMask(reg)) == 0) + { + RegRecord* physRegRecord = getRegisterRecord(reg); + Interval* assignedInterval = physRegRecord->assignedInterval; + + if (assignedInterval != nullptr) + { + assert(assignedInterval->isLocalVar || assignedInterval->isConstant); + if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord) + { + assignedInterval->isActive = false; + if (assignedInterval->getNextRefPosition() == nullptr) + { + unassignPhysReg(physRegRecord, nullptr); + } + inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK; + } + else + { + // This interval may still be active, but was in another register in an + // intervening block. + physRegRecord->assignedInterval = nullptr; + } + } + } + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); +} + +//------------------------------------------------------------------------ +// processBlockEndLocations: Record the variables occupying registers after completing the current block. +// +// Arguments: +// currentBlock - the block we have just completed. +// +// Return Value: +// None +// +// Notes: +// This must be called both during the allocation and resolution (write-back) phases. +// This is because we need to have the outVarToRegMap locations in order to set the locations +// at successor blocks during allocation time, but if lclVars are spilled after a block has been +// completed, we need to record the REG_STK location for those variables at resolution time. + +void LinearScan::processBlockEndLocations(BasicBlock* currentBlock) +{ + assert(currentBlock != nullptr && currentBlock->bbNum == curBBNum); + VarToRegMap outVarToRegMap = getOutVarToRegMap(curBBNum); + + VARSET_TP VARSET_INIT_NOCOPY(liveOut, currentBlock->bbLiveOut); +#ifdef DEBUG + if (getLsraExtendLifeTimes()) + { + VarSetOps::AssignNoCopy(compiler, liveOut, compiler->lvaTrackedVars); + } +#endif // DEBUG + regMaskTP liveRegs = RBM_NONE; + VARSET_ITER_INIT(compiler, iter, liveOut, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + Interval* interval = getIntervalForLocalVar(varNum); + if (interval->isActive) + { + assert(interval->physReg != REG_NA && interval->physReg != REG_STK); + outVarToRegMap[varIndex] = interval->physReg; + } + else + { + outVarToRegMap[varIndex] = REG_STK; + } + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB)); +} + +#ifdef DEBUG +void LinearScan::dumpRefPositions(const char* str) +{ + printf("------------\n"); + printf("REFPOSITIONS %s: \n", str); + printf("------------\n"); + for (auto& refPos : refPositions) + { + refPos.dump(); + } +} +#endif // DEBUG + +bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType) +{ + RegRecord* physRegRecord = getRegisterRecord(regNum); + + bool isFree = physRegRecord->isFree(); + +#ifdef _TARGET_ARM_ + if (isFree && regType == TYP_DOUBLE) + { + isFree = getRegisterRecord(REG_NEXT(regNum))->isFree(); + } +#endif // _TARGET_ARM_ + + return isFree; +} + +//------------------------------------------------------------------------ +// LinearScan::freeRegister: Make a register available for use +// +// Arguments: +// physRegRecord - the RegRecord for the register to be freed. +// +// Return Value: +// None. +// +// Assumptions: +// None. +// It may be that the RegRecord has already been freed, e.g. due to a kill, +// in which case this method has no effect. +// +// Notes: +// If there is currently an Interval assigned to this register, and it has +// more references (i.e. this is a local last-use, but more uses and/or +// defs remain), it will remain assigned to the physRegRecord. However, since +// it is marked inactive, the register will be available, albeit less desirable +// to allocate. +void LinearScan::freeRegister(RegRecord* physRegRecord) +{ + Interval* assignedInterval = physRegRecord->assignedInterval; + // It may have already been freed by a "Kill" + if (assignedInterval != nullptr) + { + assignedInterval->isActive = false; + // If this is a constant node, that we may encounter again (e.g. constant), + // don't unassign it until we need the register. + if (!assignedInterval->isConstant) + { + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + // Unassign the register only if there are no more RefPositions, or the next + // one is a def. Note that the latter condition doesn't actually ensure that + // there aren't subsequent uses that could be reached by a def in the assigned + // register, but is merely a heuristic to avoid tying up the register (or using + // it when it's non-optimal). A better alternative would be to use SSA, so that + // we wouldn't unnecessarily link separate live ranges to the same register. + if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType)) + { + unassignPhysReg(physRegRecord, nullptr); + } + } + } +} + +void LinearScan::freeRegisters(regMaskTP regsToFree) +{ + if (regsToFree == RBM_NONE) + { + return; + } + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS)); + while (regsToFree != RBM_NONE) + { + regMaskTP nextRegBit = genFindLowestBit(regsToFree); + regsToFree &= ~nextRegBit; + regNumber nextReg = genRegNumFromMask(nextRegBit); + freeRegister(getRegisterRecord(nextReg)); + } +} + +// Actual register allocation, accomplished by iterating over all of the previously +// constructed Intervals +// Loosely based on raAssignVars() +// +void LinearScan::allocateRegisters() +{ + JITDUMP("*************** In LinearScan::allocateRegisters()\n"); + DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegisters")); + + // at start, nothing is active except for register args + for (auto& interval : intervals) + { + Interval* currentInterval = &interval; + currentInterval->recentRefPosition = nullptr; + currentInterval->isActive = false; + if (currentInterval->isLocalVar) + { + LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); + if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr) + { + currentInterval->isActive = true; + } + } + } + + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + getRegisterRecord(reg)->recentRefPosition = nullptr; + getRegisterRecord(reg)->isActive = false; + } + +#ifdef DEBUG + regNumber lastAllocatedReg = REG_NA; + if (VERBOSE) + { + dumpRefPositions("BEFORE ALLOCATION"); + dumpVarRefPositions("BEFORE ALLOCATION"); + + printf("\n\nAllocating Registers\n" + "--------------------\n"); + if (dumpTerse) + { + dumpRegRecordHeader(); + // Now print an empty indent + printf(indentFormat, ""); + } + } +#endif // DEBUG + + BasicBlock* currentBlock = nullptr; + + LsraLocation prevLocation = MinLocation; + regMaskTP regsToFree = RBM_NONE; + regMaskTP delayRegsToFree = RBM_NONE; + + // This is the most recent RefPosition for which a register was allocated + // - currently only used for DEBUG but maintained in non-debug, for clarity of code + // (and will be optimized away because in non-debug spillAlways() unconditionally returns false) + RefPosition* lastAllocatedRefPosition = nullptr; + + bool handledBlockEnd = false; + + for (auto& refPosition : refPositions) + { + RefPosition* currentRefPosition = &refPosition; + +#ifdef DEBUG + // Set the activeRefPosition to null until we're done with any boundary handling. + activeRefPosition = nullptr; + if (VERBOSE) + { + if (dumpTerse) + { + // We're really dumping the RegRecords "after" the previous RefPosition, but it's more convenient + // to do this here, since there are a number of "continue"s in this loop. + dumpRegRecords(); + } + else + { + printf("\n"); + } + } +#endif // DEBUG + + // This is the previousRefPosition of the current Referent, if any + RefPosition* previousRefPosition = nullptr; + + Interval* currentInterval = nullptr; + Referenceable* currentReferent = nullptr; + bool isInternalRef = false; + RefType refType = currentRefPosition->refType; + + currentReferent = currentRefPosition->referent; + + if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->isPhysRegRef && + !lastAllocatedRefPosition->getInterval()->isInternal && + (RefTypeIsDef(lastAllocatedRefPosition->refType) || lastAllocatedRefPosition->getInterval()->isLocalVar)) + { + assert(lastAllocatedRefPosition->registerAssignment != RBM_NONE); + RegRecord* regRecord = lastAllocatedRefPosition->getInterval()->assignedReg; + unassignPhysReg(regRecord, lastAllocatedRefPosition); + // Now set lastAllocatedRefPosition to null, so that we don't try to spill it again + lastAllocatedRefPosition = nullptr; + } + + // We wait to free any registers until we've completed all the + // uses for the current node. + // This avoids reusing registers too soon. + // We free before the last true def (after all the uses & internal + // registers), and then again at the beginning of the next node. + // This is made easier by assigning two LsraLocations per node - one + // for all the uses, internal registers & all but the last def, and + // another for the final def (if any). + + LsraLocation currentLocation = currentRefPosition->nodeLocation; + + if ((regsToFree | delayRegsToFree) != RBM_NONE) + { + bool doFreeRegs = false; + // Free at a new location, or at a basic block boundary + if (currentLocation > prevLocation || refType == RefTypeBB) + { + doFreeRegs = true; + } + + if (doFreeRegs) + { + freeRegisters(regsToFree); + regsToFree = delayRegsToFree; + delayRegsToFree = RBM_NONE; + } + } + prevLocation = currentLocation; + + // get previous refposition, then current refpos is the new previous + if (currentReferent != nullptr) + { + previousRefPosition = currentReferent->recentRefPosition; + currentReferent->recentRefPosition = currentRefPosition; + } + else + { + assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs)); + } + + // For the purposes of register resolution, we handle the DummyDefs before + // the block boundary - so the RefTypeBB is after all the DummyDefs. + // However, for the purposes of allocation, we want to handle the block + // boundary first, so that we can free any registers occupied by lclVars + // that aren't live in the next block and make them available for the + // DummyDefs. + + if (!handledBlockEnd && (refType == RefTypeBB || refType == RefTypeDummyDef)) + { + // Free any delayed regs (now in regsToFree) before processing the block boundary + freeRegisters(regsToFree); + regsToFree = RBM_NONE; + handledBlockEnd = true; + curBBStartLocation = currentRefPosition->nodeLocation; + if (currentBlock == nullptr) + { + currentBlock = startBlockSequence(); + } + else + { + processBlockEndAllocation(currentBlock); + currentBlock = moveToNextBlock(); + } +#ifdef DEBUG + if (VERBOSE && currentBlock != nullptr && !dumpTerse) + { + currentBlock->dspBlockHeader(compiler); + printf("\n"); + } +#endif // DEBUG + } + +#ifdef DEBUG + activeRefPosition = currentRefPosition; + if (VERBOSE) + { + if (dumpTerse) + { + dumpRefPositionShort(currentRefPosition, currentBlock); + } + else + { + currentRefPosition->dump(); + } + } +#endif // DEBUG + + if (refType == RefTypeBB) + { + handledBlockEnd = false; + continue; + } + + if (refType == RefTypeKillGCRefs) + { + spillGCRefs(currentRefPosition); + continue; + } + + // If this is a FixedReg, disassociate any inactive constant interval from this register. + // Otherwise, do nothing. + if (refType == RefTypeFixedReg) + { + RegRecord* regRecord = currentRefPosition->getReg(); + if (regRecord->assignedInterval != nullptr && !regRecord->assignedInterval->isActive && + regRecord->assignedInterval->isConstant) + { + regRecord->assignedInterval = nullptr; + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg())); + continue; + } + + // If this is an exposed use, do nothing - this is merely a placeholder to attempt to + // ensure that a register is allocated for the full lifetime. The resolution logic + // will take care of moving to the appropriate register if needed. + + if (refType == RefTypeExpUse) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_EXP_USE)); + continue; + } + + regNumber assignedRegister = REG_NA; + + if (currentRefPosition->isIntervalRef()) + { + currentInterval = currentRefPosition->getInterval(); + assignedRegister = currentInterval->physReg; +#if DEBUG + if (VERBOSE && !dumpTerse) + { + currentInterval->dump(); + } +#endif // DEBUG + + // Identify the special cases where we decide up-front not to allocate + bool allocate = true; + bool didDump = false; + + if (refType == RefTypeParamDef || refType == RefTypeZeroInit) + { + // For a ParamDef with a weighted refCount less than unity, don't enregister it at entry. + // TODO-CQ: Consider doing this only for stack parameters, since otherwise we may be needlessly + // inserting a store. + LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); + assert(varDsc != nullptr); + if (refType == RefTypeParamDef && varDsc->lvRefCntWtd <= BB_UNITY_WEIGHT) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); + didDump = true; + allocate = false; + } + // If it has no actual references, mark it as "lastUse"; since they're not actually part + // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a + // register we won't unassign it. + else if (currentRefPosition->nextRefPosition == nullptr) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval)); + currentRefPosition->lastUse = true; + } + } +#ifdef FEATURE_SIMD + else if (refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse) + { + Interval* lclVarInterval = currentInterval->relatedInterval; + if (lclVarInterval->physReg == REG_NA) + { + allocate = false; + } + } +#endif // FEATURE_SIMD + + if (allocate == false) + { + if (assignedRegister != REG_NA) + { + unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); + } + else if (!didDump) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + didDump = true; + } + currentRefPosition->registerAssignment = RBM_NONE; + continue; + } + + if (currentInterval->isSpecialPutArg) + { + assert(!currentInterval->isLocalVar); + Interval* srcInterval = currentInterval->relatedInterval; + assert(srcInterval->isLocalVar); + if (refType == RefTypeDef) + { + assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1); + RegRecord* physRegRecord = srcInterval->assignedReg; + + // For a putarg_reg to be special, its next use location has to be the same + // as fixed reg's next kill location. Otherwise, if source lcl var's next use + // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's + // kill would lead to spill of source but not the putarg_reg if it were treated + // as special. + if (srcInterval->isActive && + genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment && + currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation()) + { + assert(physRegRecord->regNum == srcInterval->physReg); + + // Special putarg_reg acts as a pass-thru since both source lcl var + // and putarg_reg have the same register allocated. Physical reg + // record of reg continue to point to source lcl var's interval + // instead of to putarg_reg's interval. So if a spill of reg + // allocated to source lcl var happens, to reallocate to another + // tree node, before its use at call node it will lead to spill of + // lcl var instead of putarg_reg since physical reg record is pointing + // to lcl var's interval. As a result, arg reg would get trashed leading + // to bad codegen. The assumption here is that source lcl var of a + // special putarg_reg doesn't get spilled and re-allocated prior to + // its use at the call node. This is ensured by marking physical reg + // record as busy until next kill. + physRegRecord->isBusyUntilNextKill = true; + } + else + { + currentInterval->isSpecialPutArg = false; + } + } + // If this is still a SpecialPutArg, continue; + if (currentInterval->isSpecialPutArg) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval, + currentRefPosition->assignedReg())); + continue; + } + } + + if (assignedRegister == REG_NA && RefTypeIsUse(refType)) + { + currentRefPosition->reload = true; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); + } + } + + regMaskTP assignedRegBit = RBM_NONE; + bool isInRegister = false; + if (assignedRegister != REG_NA) + { + isInRegister = true; + assignedRegBit = genRegMask(assignedRegister); + if (!currentInterval->isActive) + { + // If this is a use, it must have started the block on the stack, but the register + // was available for use so we kept the association. + if (RefTypeIsUse(refType)) + { + assert(inVarToRegMaps[curBBNum][currentInterval->getVarIndex(compiler)] == REG_STK && + previousRefPosition->nodeLocation <= curBBStartLocation); + isInRegister = false; + } + else + { + currentInterval->isActive = true; + } + } + assert(currentInterval->assignedReg != nullptr && + currentInterval->assignedReg->regNum == assignedRegister && + currentInterval->assignedReg->assignedInterval == currentInterval); + } + + // If this is a physical register, we unconditionally assign it to itself! + if (currentRefPosition->isPhysRegRef) + { + RegRecord* currentReg = currentRefPosition->getReg(); + Interval* assignedInterval = currentReg->assignedInterval; + + if (assignedInterval != nullptr) + { + unassignPhysReg(currentReg, assignedInterval->recentRefPosition); + } + currentReg->isActive = true; + assignedRegister = currentReg->regNum; + assignedRegBit = genRegMask(assignedRegister); + if (refType == RefTypeKill) + { + currentReg->isBusyUntilNextKill = false; + } + } + else if (previousRefPosition != nullptr) + { + assert(previousRefPosition->nextRefPosition == currentRefPosition); + assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment || + currentRefPosition->outOfOrder || previousRefPosition->copyReg || + previousRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef); + } + else if (assignedRegister != REG_NA) + { + // Handle the case where this is a preassigned register (i.e. parameter). + // We don't want to actually use the preassigned register if it's not + // going to cover the lifetime - but we had to preallocate it to ensure + // that it remained live. + // TODO-CQ: At some point we may want to refine the analysis here, in case + // it might be beneficial to keep it in this reg for PART of the lifetime + if (currentInterval->isLocalVar) + { + regMaskTP preferences = currentInterval->registerPreferences; + bool keepAssignment = true; + bool matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE; + + // Will the assigned register cover the lifetime? If not, does it at least + // meet the preferences for the next RefPosition? + RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); + RefPosition* nextPhysRegRefPos = physRegRecord->getNextRefPosition(); + if (nextPhysRegRefPos != nullptr && + nextPhysRegRefPos->nodeLocation <= currentInterval->lastRefPosition->nodeLocation) + { + // Check to see if the existing assignment matches the preferences (e.g. callee save registers) + // and ensure that the next use of this localVar does not occur after the nextPhysRegRefPos + // There must be a next RefPosition, because we know that the Interval extends beyond the + // nextPhysRegRefPos. + RefPosition* nextLclVarRefPos = currentRefPosition->nextRefPosition; + assert(nextLclVarRefPos != nullptr); + if (!matchesPreferences || nextPhysRegRefPos->nodeLocation < nextLclVarRefPos->nodeLocation || + physRegRecord->conflictingFixedRegReference(nextLclVarRefPos)) + { + keepAssignment = false; + } + } + else if (refType == RefTypeParamDef && !matchesPreferences) + { + // Don't use the register, even if available, if it doesn't match the preferences. + // Note that this case is only for ParamDefs, for which we haven't yet taken preferences + // into account (we've just automatically got the initial location). In other cases, + // we would already have put it in a preferenced register, if it was available. + // TODO-CQ: Consider expanding this to check availability - that would duplicate + // code here, but otherwise we may wind up in this register anyway. + keepAssignment = false; + } + + if (keepAssignment == false) + { + currentRefPosition->registerAssignment = allRegs(currentInterval->registerType); + unassignPhysRegNoSpill(physRegRecord); + + // If the preferences are currently set to just this register, reset them to allRegs + // of the appropriate type (just as we just reset the registerAssignment for this + // RefPosition. + // Otherwise, simply remove this register from the preferences, if it's there. + + if (currentInterval->registerPreferences == assignedRegBit) + { + currentInterval->registerPreferences = currentRefPosition->registerAssignment; + } + else + { + currentInterval->registerPreferences &= ~assignedRegBit; + } + + assignedRegister = REG_NA; + assignedRegBit = RBM_NONE; + } + } + } + + if (assignedRegister != REG_NA) + { + // If there is a conflicting fixed reference, insert a copy. + RegRecord* physRegRecord = getRegisterRecord(assignedRegister); + if (physRegRecord->conflictingFixedRegReference(currentRefPosition)) + { + // We may have already reassigned the register to the conflicting reference. + // If not, we need to unassign this interval. + if (physRegRecord->assignedInterval == currentInterval) + { + unassignPhysRegNoSpill(physRegRecord); + } + currentRefPosition->moveReg = true; + assignedRegister = REG_NA; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); + } + else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0) + { + currentRefPosition->registerAssignment = assignedRegBit; + if (!currentReferent->isActive) + { + // If we've got an exposed use at the top of a block, the + // interval might not have been active. Otherwise if it's a use, + // the interval must be active. + if (refType == RefTypeDummyDef) + { + currentReferent->isActive = true; + assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval); + } + else + { + currentRefPosition->reload = true; + } + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); + } + else + { + // This must be a localVar or a single-reg fixed use or a tree temp with conflicting def & use. + + assert(currentInterval && (currentInterval->isLocalVar || currentRefPosition->isFixedRegRef || + currentInterval->hasConflictingDefUse)); + + // It's already in a register, but not one we need. + // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that + // the needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy. + // The reason we need special handling for the "delayRegFree" case is that we need to mark the + // fixed-reg as in-use and delayed (the FixedReg RefPosition doesn't handle the delay requirement). + // Otherwise, if this is a pure use localVar or tree temp, we assign a copyReg, but must free both regs + // if it is a last use. + if (!currentRefPosition->isFixedRegRef || currentRefPosition->delayRegFree) + { + if (!RefTypeIsDef(currentRefPosition->refType)) + { + regNumber copyReg = assignCopyReg(currentRefPosition); + assert(copyReg != REG_NA); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg)); + lastAllocatedRefPosition = currentRefPosition; + if (currentRefPosition->lastUse) + { + if (currentRefPosition->delayRegFree) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, + assignedRegister)); + delayRegsToFree |= + (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); + } + else + { + INDEBUG( + dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); + regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); + } + } + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + if (!currentInterval->isLocalVar) + { + currentRefPosition->moveReg = true; + currentRefPosition->copyReg = false; + } + continue; + } + else + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); + regsToFree |= genRegMask(assignedRegister); + // We want a new register, but we don't want this to be considered a spill. + assignedRegister = REG_NA; + if (physRegRecord->assignedInterval == currentInterval) + { + unassignPhysRegNoSpill(physRegRecord); + } + } + } + else + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, assignedRegister)); + } + } + } + + if (assignedRegister == REG_NA) + { + bool allocateReg = true; + + if (currentRefPosition->AllocateIfProfitable()) + { + // We can avoid allocating a register if it is a the last use requiring a reload. + if (currentRefPosition->lastUse && currentRefPosition->reload) + { + allocateReg = false; + } + +#ifdef DEBUG + // Under stress mode, don't attempt to allocate a reg to + // reg optional ref position. + if (allocateReg && regOptionalNoAlloc()) + { + allocateReg = false; + } +#endif + } + + if (allocateReg) + { + // Try to allocate a register + assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition); + } + + // If no register was found, and if the currentRefPosition must have a register, + // then find a register to spill + if (assignedRegister == REG_NA) + { +#ifdef FEATURE_SIMD + if (refType == RefTypeUpperVectorSaveDef) + { + // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable. + currentRefPosition->registerAssignment = (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH); + assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition); + // There MUST be caller-save registers available, because they have all just been killed. + assert(assignedRegister != REG_NA); + // Now, spill it. + // (These will look a bit backward in the dump, but it's a pain to dump the alloc before the spill). + unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister)); + // Now set assignedRegister to REG_NA again so that we don't re-activate it. + assignedRegister = REG_NA; + } + else +#endif // FEATURE_SIMD + if (currentRefPosition->RequiresRegister() || currentRefPosition->AllocateIfProfitable()) + { + if (allocateReg) + { + assignedRegister = allocateBusyReg(currentInterval, currentRefPosition, + currentRefPosition->AllocateIfProfitable()); + } + + if (assignedRegister != REG_NA) + { + INDEBUG( + dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister)); + } + else + { + // This can happen only for those ref positions that are to be allocated + // only if profitable. + noway_assert(currentRefPosition->AllocateIfProfitable()); + + currentRefPosition->registerAssignment = RBM_NONE; + currentRefPosition->reload = false; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + } + } + else + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + currentRefPosition->registerAssignment = RBM_NONE; + currentInterval->isActive = false; + } + } +#ifdef DEBUG + else + { + if (VERBOSE) + { + if (currentInterval->isConstant && (currentRefPosition->treeNode != nullptr) && + currentRefPosition->treeNode->IsReuseRegVal()) + { + dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, assignedRegister, currentBlock); + } + else + { + dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, assignedRegister, currentBlock); + } + } + } +#endif // DEBUG + + if (refType == RefTypeDummyDef && assignedRegister != REG_NA) + { + setInVarRegForBB(curBBNum, currentInterval->varNum, assignedRegister); + } + + // If we allocated a register, and this is a use of a spilled value, + // it should have been marked for reload above. + if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister) + { + assert(currentRefPosition->reload); + } + } + + // If we allocated a register, record it + if (currentInterval != nullptr && assignedRegister != REG_NA) + { + assignedRegBit = genRegMask(assignedRegister); + currentRefPosition->registerAssignment = assignedRegBit; + currentInterval->physReg = assignedRegister; + regsToFree &= ~assignedRegBit; // we'll set it again later if it's dead + + // If this interval is dead, free the register. + // The interval could be dead if this is a user variable, or if the + // node is being evaluated for side effects, or a call whose result + // is not used, etc. + if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr) + { + assert(currentRefPosition->isIntervalRef()); + + if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr) + { + if (currentRefPosition->delayRegFree) + { + delayRegsToFree |= assignedRegBit; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); + } + else + { + regsToFree |= assignedRegBit; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); + } + } + else + { + currentInterval->isActive = false; + } + } + + lastAllocatedRefPosition = currentRefPosition; + } + } + + // Free registers to clear associated intervals for resolution phase + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (getLsraExtendLifeTimes()) + { + // If we have extended lifetimes, we need to make sure all the registers are freed. + for (int regNumIndex = 0; regNumIndex <= REG_FP_LAST; regNumIndex++) + { + RegRecord& regRecord = physRegs[regNumIndex]; + Interval* interval = regRecord.assignedInterval; + if (interval != nullptr) + { + interval->isActive = false; + unassignPhysReg(®Record, nullptr); + } + } + } + else +#endif // DEBUG + { + freeRegisters(regsToFree | delayRegsToFree); + } + +#ifdef DEBUG + if (VERBOSE) + { + if (dumpTerse) + { + // Dump the RegRecords after the last RefPosition is handled. + dumpRegRecords(); + printf("\n"); + } + + dumpRefPositions("AFTER ALLOCATION"); + dumpVarRefPositions("AFTER ALLOCATION"); + + // Dump the intervals that remain active + printf("Active intervals at end of allocation:\n"); + + // We COULD just reuse the intervalIter from above, but ArrayListIterator doesn't + // provide a Reset function (!) - we'll probably replace this so don't bother + // adding it + + for (auto& interval : intervals) + { + if (interval.isActive) + { + printf("Active "); + interval.dump(); + } + } + + printf("\n"); + } +#endif // DEBUG +} + +// LinearScan::resolveLocalRef +// Description: +// Update the graph for a local reference. +// Also, track the register (if any) that is currently occupied. +// Arguments: +// treeNode: The lclVar that's being resolved +// currentRefPosition: the RefPosition associated with the treeNode +// +// Details: +// This method is called for each local reference, during the resolveRegisters +// phase of LSRA. It is responsible for keeping the following in sync: +// - varDsc->lvRegNum (and lvOtherReg) contain the unique register location. +// If it is not in the same register through its lifetime, it is set to REG_STK. +// - interval->physReg is set to the assigned register +// (i.e. at the code location which is currently being handled by resolveRegisters()) +// - interval->isActive is true iff the interval is live and occupying a register +// - interval->isSpilled is set to true if the interval is EVER spilled +// - interval->isSplit is set to true if the interval does not occupy the same +// register throughout the method +// - RegRecord->assignedInterval points to the interval which currently occupies +// the register +// - For each lclVar node: +// - gtRegNum/gtRegPair is set to the currently allocated register(s) +// - GTF_REG_VAL is set if it is a use, and is in a register +// - GTF_SPILLED is set on a use if it must be reloaded prior to use (GTF_REG_VAL +// must not be set) +// - GTF_SPILL is set if it must be spilled after use (GTF_REG_VAL may or may not +// be set) +// +// A copyReg is an ugly case where the variable must be in a specific (fixed) register, +// but it currently resides elsewhere. The register allocator must track the use of the +// fixed register, but it marks the lclVar node with the register it currently lives in +// and the code generator does the necessary move. +// +// Before beginning, the varDsc for each parameter must be set to its initial location. +// +// NICE: Consider tracking whether an Interval is always in the same location (register/stack) +// in which case it will require no resolution. +// +void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition) +{ + assert((block == nullptr) == (treeNode == nullptr)); + + // Is this a tracked local? Or just a register allocated for loading + // a non-tracked one? + Interval* interval = currentRefPosition->getInterval(); + if (!interval->isLocalVar) + { + return; + } + interval->recentRefPosition = currentRefPosition; + LclVarDsc* varDsc = interval->getLocalVar(compiler); + + if (currentRefPosition->registerAssignment == RBM_NONE) + { + assert(!currentRefPosition->RequiresRegister()); + + interval->isSpilled = true; + varDsc->lvRegNum = REG_STK; + if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval) + { + interval->assignedReg->assignedInterval = nullptr; + } + interval->assignedReg = nullptr; + interval->physReg = REG_NA; + + return; + } + + // In most cases, assigned and home registers will be the same + // The exception is the copyReg case, where we've assigned a register + // for a specific purpose, but will be keeping the register assignment + regNumber assignedReg = currentRefPosition->assignedReg(); + regNumber homeReg = assignedReg; + + // Undo any previous association with a physical register, UNLESS this + // is a copyReg + if (!currentRefPosition->copyReg) + { + regNumber oldAssignedReg = interval->physReg; + if (oldAssignedReg != REG_NA && assignedReg != oldAssignedReg) + { + RegRecord* oldRegRecord = getRegisterRecord(oldAssignedReg); + if (oldRegRecord->assignedInterval == interval) + { + oldRegRecord->assignedInterval = nullptr; + } + } + } + + if (currentRefPosition->refType == RefTypeUse && !currentRefPosition->reload) + { + // Was this spilled after our predecessor was scheduled? + if (interval->physReg == REG_NA) + { + assert(inVarToRegMaps[curBBNum][varDsc->lvVarIndex] == REG_STK); + currentRefPosition->reload = true; + } + } + + bool reload = currentRefPosition->reload; + bool spillAfter = currentRefPosition->spillAfter; + + // In the reload case we simply do not set GTF_REG_VAL, and it gets + // referenced from the variable's home location. + // This is also true for a pure def which is spilled. + if (reload && currentRefPosition->refType != RefTypeDef) + { + varDsc->lvRegNum = REG_STK; + if (!spillAfter) + { + interval->physReg = assignedReg; + } + + // If there is no treeNode, this must be a RefTypeExpUse, in + // which case we did the reload already + if (treeNode != nullptr) + { + treeNode->gtFlags |= GTF_SPILLED; + if (spillAfter) + { + if (currentRefPosition->AllocateIfProfitable()) + { + // This is a use of lclVar that is flagged as reg-optional + // by lower/codegen and marked for both reload and spillAfter. + // In this case we can avoid unnecessary reload and spill + // by setting reg on lclVar to REG_STK and reg on tree node + // to REG_NA. Codegen will generate the code by considering + // it as a contained memory operand. + // + // Note that varDsc->lvRegNum is already to REG_STK above. + interval->physReg = REG_NA; + treeNode->gtRegNum = REG_NA; + treeNode->gtFlags &= ~GTF_SPILLED; + } + else + { + treeNode->gtFlags |= GTF_SPILL; + } + } + } + else + { + assert(currentRefPosition->refType == RefTypeExpUse); + } + + // If we have an undefined use set it as non-reg + if (!interval->isSpilled) + { + if (varDsc->lvIsParam && !varDsc->lvIsRegArg && currentRefPosition == interval->firstRefPosition) + { + // Parameters are the only thing that can be used before defined + } + else + { + // if we see a use before def of something else, the zero init flag better not be set. + noway_assert(!compiler->info.compInitMem); + // if it is not set, then the behavior is undefined but we don't want to crash or assert + interval->isSpilled = true; + } + } + } + else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType)) + { + // In the case of a pure def, don't bother spilling - just assign it to the + // stack. However, we need to remember that it was spilled. + + interval->isSpilled = true; + varDsc->lvRegNum = REG_STK; + interval->physReg = REG_NA; + if (treeNode != nullptr) + { + treeNode->gtRegNum = REG_NA; + } + } + else + { + // Not reload and Not pure-def that's spillAfter + + if (currentRefPosition->copyReg || currentRefPosition->moveReg) + { + // For a copyReg or moveReg, we have two cases: + // - In the first case, we have a fixedReg - i.e. a register which the code + // generator is constrained to use. + // The code generator will generate the appropriate move to meet the requirement. + // - In the second case, we were forced to use a different register because of + // interference (or JitStressRegs). + // In this case, we generate a GT_COPY. + // In either case, we annotate the treeNode with the register in which the value + // currently lives. For moveReg, the homeReg is the new register (as assigned above). + // But for copyReg, the homeReg remains unchanged. + + assert(treeNode != nullptr); + treeNode->gtRegNum = interval->physReg; + + if (currentRefPosition->copyReg) + { + homeReg = interval->physReg; + } + else + { + interval->physReg = assignedReg; + } + + if (!currentRefPosition->isFixedRegRef || currentRefPosition->moveReg) + { + // This is the second case, where we need to generate a copy + insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(), currentRefPosition); + } + } + else + { + interval->physReg = assignedReg; + + if (!interval->isSpilled && !interval->isSplit) + { + if (varDsc->lvRegNum != REG_STK) + { + // If the register assignments don't match, then this interval is spilt, + // but not spilled (yet) + // However, we don't have a single register assignment now + if (varDsc->lvRegNum != assignedReg) + { + interval->isSplit = TRUE; + varDsc->lvRegNum = REG_STK; + } + } + else + { + varDsc->lvRegNum = assignedReg; + } + } + } + if (spillAfter) + { + if (treeNode != nullptr) + { + treeNode->gtFlags |= GTF_SPILL; + } + interval->isSpilled = true; + interval->physReg = REG_NA; + varDsc->lvRegNum = REG_STK; + } + + // This value is in a register, UNLESS we already saw this treeNode + // and marked it for reload + if (treeNode != nullptr && !(treeNode->gtFlags & GTF_SPILLED)) + { + treeNode->gtFlags |= GTF_REG_VAL; + } + } + + // Update the physRegRecord for the register, so that we know what vars are in + // regs at the block boundaries + RegRecord* physRegRecord = getRegisterRecord(homeReg); + if (spillAfter || currentRefPosition->lastUse) + { + physRegRecord->assignedInterval = nullptr; + interval->assignedReg = nullptr; + interval->physReg = REG_NA; + interval->isActive = false; + } + else + { + interval->isActive = true; + physRegRecord->assignedInterval = interval; + interval->assignedReg = physRegRecord; + } +} + +void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree) +{ + lsraAssignRegToTree(tree, currentRefPosition->assignedReg(), currentRefPosition->getMultiRegIdx()); +} + +//------------------------------------------------------------------------ +// insertCopyOrReload: Insert a copy in the case where a tree node value must be moved +// to a different register at the point of use (GT_COPY), or it is reloaded to a different register +// than the one it was spilled from (GT_RELOAD). +// +// Arguments: +// tree - This is the node to copy or reload. +// Insert copy or reload node between this node and its parent. +// multiRegIdx - register position of tree node for which copy or reload is needed. +// refPosition - The RefPosition at which copy or reload will take place. +// +// Notes: +// The GT_COPY or GT_RELOAD will be inserted in the proper spot in execution order where the reload is to occur. +// +// For example, for this tree (numbers are execution order, lower is earlier and higher is later): +// +// +---------+----------+ +// | GT_ADD (3) | +// +---------+----------+ +// | +// / \ +// / \ +// / \ +// +-------------------+ +----------------------+ +// | x (1) | "tree" | y (2) | +// +-------------------+ +----------------------+ +// +// generate this tree: +// +// +---------+----------+ +// | GT_ADD (4) | +// +---------+----------+ +// | +// / \ +// / \ +// / \ +// +-------------------+ +----------------------+ +// | GT_RELOAD (3) | | y (2) | +// +-------------------+ +----------------------+ +// | +// +-------------------+ +// | x (1) | "tree" +// +-------------------+ +// +// Note in particular that the GT_RELOAD node gets inserted in execution order immediately before the parent of "tree", +// which seems a bit weird since normally a node's parent (in this case, the parent of "x", GT_RELOAD in the "after" +// picture) immediately follows all of its children (that is, normally the execution ordering is postorder). +// The ordering must be this weird "out of normal order" way because the "x" node is being spilled, probably +// because the expression in the tree represented above by "y" has high register requirements. We don't want +// to reload immediately, of course. So we put GT_RELOAD where the reload should actually happen. +// +// Note that GT_RELOAD is required when we reload to a different register than the one we spilled to. It can also be +// used if we reload to the same register. Normally, though, in that case we just mark the node with GTF_SPILLED, +// and the unspilling code automatically reuses the same register, and does the reload when it notices that flag +// when considering a node's operands. +// +void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition) +{ + LIR::Range& blockRange = LIR::AsRange(block); + + LIR::Use treeUse; + bool foundUse = blockRange.TryGetUse(tree, &treeUse); + assert(foundUse); + + GenTree* parent = treeUse.User(); + + genTreeOps oper; + if (refPosition->reload) + { + oper = GT_RELOAD; + } + else + { + oper = GT_COPY; + } + + // If the parent is a reload/copy node, then tree must be a multi-reg call node + // that has already had one of its registers spilled. This is Because multi-reg + // call node is the only node whose RefTypeDef positions get independently + // spilled or reloaded. It is possible that one of its RefTypeDef position got + // spilled and the next use of it requires it to be in a different register. + // + // In this case set the ith position reg of reload/copy node to the reg allocated + // for copy/reload refPosition. Essentially a copy/reload node will have a reg + // for each multi-reg position of its child. If there is a valid reg in ith + // position of GT_COPY or GT_RELOAD node then the corresponding result of its + // child needs to be copied or reloaded to that reg. + if (parent->IsCopyOrReload()) + { + noway_assert(parent->OperGet() == oper); + noway_assert(tree->IsMultiRegCall()); + GenTreeCall* call = tree->AsCall(); + GenTreeCopyOrReload* copyOrReload = parent->AsCopyOrReload(); + noway_assert(copyOrReload->GetRegNumByIdx(multiRegIdx) == REG_NA); + copyOrReload->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx); + } + else + { + // Create the new node, with "tree" as its only child. + var_types treeType = tree->TypeGet(); + +#ifdef FEATURE_SIMD + // Check to see whether we need to move to a different register set. + // This currently only happens in the case of SIMD vector types that are small enough (pointer size) + // that they must be passed & returned in integer registers. + // 'treeType' is the type of the register we are moving FROM, + // and refPosition->registerAssignment is the mask for the register we are moving TO. + // If they don't match, we need to reverse the type for the "move" node. + + if ((allRegs(treeType) & refPosition->registerAssignment) == 0) + { + treeType = (useFloatReg(treeType)) ? TYP_I_IMPL : TYP_SIMD8; + } +#endif // FEATURE_SIMD + + GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree); + assert(refPosition->registerAssignment != RBM_NONE); + newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx); + newNode->gtLsraInfo.isLsraAdded = true; + newNode->gtLsraInfo.isLocalDefUse = false; + if (refPosition->copyReg) + { + // This is a TEMPORARY copy + assert(isCandidateLocalRef(tree)); + newNode->gtFlags |= GTF_VAR_DEATH; + } + + // Insert the copy/reload after the spilled node and replace the use of the original node with a use + // of the copy/reload. + blockRange.InsertAfter(tree, newNode); + treeUse.ReplaceWith(compiler, newNode); + } +} + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +//------------------------------------------------------------------------ +// insertUpperVectorSaveAndReload: Insert code to save and restore the upper half of a vector that lives +// in a callee-save register at the point of a kill (the upper half is +// not preserved). +// +// Arguments: +// tree - This is the node around which we will insert the Save & Reload. +// It will be a call or some node that turns into a call. +// refPosition - The RefTypeUpperVectorSaveDef RefPosition. +// +void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block) +{ + Interval* lclVarInterval = refPosition->getInterval()->relatedInterval; + assert(lclVarInterval->isLocalVar == true); + LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum; + assert(varDsc->lvType == LargeVectorType); + regNumber lclVarReg = lclVarInterval->physReg; + if (lclVarReg == REG_NA) + { + return; + } + + assert((genRegMask(lclVarReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE); + + regNumber spillReg = refPosition->assignedReg(); + bool spillToMem = refPosition->spillAfter; + + LIR::Range& blockRange = LIR::AsRange(block); + + // First, insert the save as an embedded statement before the call. + + GenTreePtr saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType); + saveLcl->gtLsraInfo.isLsraAdded = true; + saveLcl->gtRegNum = lclVarReg; + saveLcl->gtFlags |= GTF_REG_VAL; + saveLcl->gtLsraInfo.isLocalDefUse = false; + + GenTreeSIMD* simdNode = + new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave, + varDsc->lvBaseType, genTypeSize(LargeVectorType)); + simdNode->gtLsraInfo.isLsraAdded = true; + simdNode->gtRegNum = spillReg; + if (spillToMem) + { + simdNode->gtFlags |= GTF_SPILL; + } + + blockRange.InsertBefore(tree, LIR::SeqTree(compiler, simdNode)); + + // Now insert the restore after the call. + + GenTreePtr restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType); + restoreLcl->gtLsraInfo.isLsraAdded = true; + restoreLcl->gtRegNum = lclVarReg; + restoreLcl->gtFlags |= GTF_REG_VAL; + restoreLcl->gtLsraInfo.isLocalDefUse = false; + + simdNode = new (compiler, GT_SIMD) + GenTreeSIMD(LargeVectorType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore, varDsc->lvBaseType, 32); + simdNode->gtLsraInfo.isLsraAdded = true; + simdNode->gtRegNum = spillReg; + if (spillToMem) + { + simdNode->gtFlags |= GTF_SPILLED; + } + + blockRange.InsertAfter(tree, LIR::SeqTree(compiler, simdNode)); +} +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + +//------------------------------------------------------------------------ +// initMaxSpill: Initializes the LinearScan members used to track the max number +// of concurrent spills. This is needed so that we can set the +// fields in Compiler, so that the code generator, in turn can +// allocate the right number of spill locations. +// +// Arguments: +// None. +// +// Return Value: +// None. +// +// Assumptions: +// This is called before any calls to updateMaxSpill(). + +void LinearScan::initMaxSpill() +{ + needDoubleTmpForFPCall = false; + needFloatTmpForFPCall = false; + for (int i = 0; i < TYP_COUNT; i++) + { + maxSpill[i] = 0; + currentSpill[i] = 0; + } +} + +//------------------------------------------------------------------------ +// recordMaxSpill: Sets the fields in Compiler for the max number of concurrent spills. +// (See the comment on initMaxSpill.) +// +// Arguments: +// None. +// +// Return Value: +// None. +// +// Assumptions: +// This is called after updateMaxSpill() has been called for all "real" +// RefPositions. + +void LinearScan::recordMaxSpill() +{ + // Note: due to the temp normalization process (see tmpNormalizeType) + // only a few types should actually be seen here. + JITDUMP("Recording the maximum number of concurrent spills:\n"); +#ifdef _TARGET_X86_ + var_types returnType = compiler->tmpNormalizeType(compiler->info.compRetType); + if (needDoubleTmpForFPCall || (returnType == TYP_DOUBLE)) + { + JITDUMP("Adding a spill temp for moving a double call/return value between xmm reg and x87 stack.\n"); + maxSpill[TYP_DOUBLE] += 1; + } + if (needFloatTmpForFPCall || (returnType == TYP_FLOAT)) + { + JITDUMP("Adding a spill temp for moving a float call/return value between xmm reg and x87 stack.\n"); + maxSpill[TYP_FLOAT] += 1; + } +#endif // _TARGET_X86_ + for (int i = 0; i < TYP_COUNT; i++) + { + if (var_types(i) != compiler->tmpNormalizeType(var_types(i))) + { + // Only normalized types should have anything in the maxSpill array. + // We assume here that if type 'i' does not normalize to itself, then + // nothing else normalizes to 'i', either. + assert(maxSpill[i] == 0); + } + JITDUMP(" %s: %d\n", varTypeName(var_types(i)), maxSpill[i]); + if (maxSpill[i] != 0) + { + compiler->tmpPreAllocateTemps(var_types(i), maxSpill[i]); + } + } +} + +//------------------------------------------------------------------------ +// updateMaxSpill: Update the maximum number of concurrent spills +// +// Arguments: +// refPosition - the current RefPosition being handled +// +// Return Value: +// None. +// +// Assumptions: +// The RefPosition has an associated interval (getInterval() will +// otherwise assert). +// +// Notes: +// This is called for each "real" RefPosition during the writeback +// phase of LSRA. It keeps track of how many concurrently-live +// spills there are, and the largest number seen so far. + +void LinearScan::updateMaxSpill(RefPosition* refPosition) +{ + RefType refType = refPosition->refType; + + if (refPosition->spillAfter || refPosition->reload || + (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA)) + { + Interval* interval = refPosition->getInterval(); + if (!interval->isLocalVar) + { + // The tmp allocation logic 'normalizes' types to a small number of + // types that need distinct stack locations from each other. + // Those types are currently gc refs, byrefs, <= 4 byte non-GC items, + // 8-byte non-GC items, and 16-byte or 32-byte SIMD vectors. + // LSRA is agnostic to those choices but needs + // to know what they are here. + var_types typ; + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if ((refType == RefTypeUpperVectorSaveDef) || (refType == RefTypeUpperVectorSaveUse)) + { + typ = LargeVectorSaveType; + } + else +#endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE + { + GenTreePtr treeNode = refPosition->treeNode; + if (treeNode == nullptr) + { + assert(RefTypeIsUse(refType)); + treeNode = interval->firstRefPosition->treeNode; + } + assert(treeNode != nullptr); + + // In case of multi-reg call nodes, we need to use the type + // of the return register given by multiRegIdx of the refposition. + if (treeNode->IsMultiRegCall()) + { + ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc(); + typ = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx()); + } + else + { + typ = treeNode->TypeGet(); + } + typ = compiler->tmpNormalizeType(typ); + } + + if (refPosition->spillAfter && !refPosition->reload) + { + currentSpill[typ]++; + if (currentSpill[typ] > maxSpill[typ]) + { + maxSpill[typ] = currentSpill[typ]; + } + } + else if (refPosition->reload) + { + assert(currentSpill[typ] > 0); + currentSpill[typ]--; + } + else if (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA) + { + // A spill temp not getting reloaded into a reg because it is + // marked as allocate if profitable and getting used from its + // memory location. To properly account max spill for typ we + // decrement spill count. + assert(RefTypeIsUse(refType)); + assert(currentSpill[typ] > 0); + currentSpill[typ]--; + } + JITDUMP(" Max spill for %s is %d\n", varTypeName(typ), maxSpill[typ]); + } + } +} + +// This is the final phase of register allocation. It writes the register assignments to +// the tree, and performs resolution across joins and backedges. +// +void LinearScan::resolveRegisters() +{ + // Iterate over the tree and the RefPositions in lockstep + // - annotate the tree with register assignments by setting gtRegNum or gtRegPair (for longs) + // on the tree node + // - track globally-live var locations + // - add resolution points at split/merge/critical points as needed + + // Need to use the same traversal order as the one that assigns the location numbers. + + // Dummy RefPositions have been added at any split, join or critical edge, at the + // point where resolution may be required. These are located: + // - for a split, at the top of the non-adjacent block + // - for a join, at the bottom of the non-adjacent joining block + // - for a critical edge, at the top of the target block of each critical + // edge. + // Note that a target block may have multiple incoming critical or split edges + // + // These RefPositions record the expected location of the Interval at that point. + // At each branch, we identify the location of each liveOut interval, and check + // against the RefPositions at the target. + + BasicBlock* block; + LsraLocation currentLocation = MinLocation; + + // Clear register assignments - these will be reestablished as lclVar defs (including RefTypeParamDefs) + // are encountered. + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* physRegRecord = getRegisterRecord(reg); + Interval* assignedInterval = physRegRecord->assignedInterval; + if (assignedInterval != nullptr) + { + assignedInterval->assignedReg = nullptr; + assignedInterval->physReg = REG_NA; + } + physRegRecord->assignedInterval = nullptr; + physRegRecord->recentRefPosition = nullptr; + } + + // Clear "recentRefPosition" for lclVar intervals + for (unsigned lclNum = 0; lclNum < compiler->lvaCount; lclNum++) + { + localVarIntervals[lclNum]->recentRefPosition = nullptr; + localVarIntervals[lclNum]->isActive = false; + } + + // handle incoming arguments and special temps + auto currentRefPosition = refPositions.begin(); + + VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum]; + while (currentRefPosition != refPositions.end() && + (currentRefPosition->refType == RefTypeParamDef || currentRefPosition->refType == RefTypeZeroInit)) + { + Interval* interval = currentRefPosition->getInterval(); + assert(interval != nullptr && interval->isLocalVar); + resolveLocalRef(nullptr, nullptr, currentRefPosition); + regNumber reg = REG_STK; + int varIndex = interval->getVarIndex(compiler); + + if (!currentRefPosition->spillAfter && currentRefPosition->registerAssignment != RBM_NONE) + { + reg = currentRefPosition->assignedReg(); + } + else + { + reg = REG_STK; + interval->isActive = false; + } + entryVarToRegMap[varIndex] = reg; + ++currentRefPosition; + } + + JITDUMP("------------------------\n"); + JITDUMP("WRITING BACK ASSIGNMENTS\n"); + JITDUMP("------------------------\n"); + + BasicBlock* insertionBlock = compiler->fgFirstBB; + GenTreePtr insertionPoint = LIR::AsRange(insertionBlock).FirstNonPhiNode(); + + // write back assignments + for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) + { + assert(curBBNum == block->bbNum); + +#ifdef DEBUG + if (VERBOSE) + { + block->dspBlockHeader(compiler); + currentRefPosition->dump(); + } +#endif // DEBUG + + // Record the var locations at the start of this block. + // (If it's fgFirstBB, we've already done that above, see entryVarToRegMap) + + curBBStartLocation = currentRefPosition->nodeLocation; + if (block != compiler->fgFirstBB) + { + processBlockStartLocations(block, false); + } + + // Handle the DummyDefs, updating the incoming var location. + for (; currentRefPosition != refPositions.end() && currentRefPosition->refType == RefTypeDummyDef; + ++currentRefPosition) + { + assert(currentRefPosition->isIntervalRef()); + // Don't mark dummy defs as reload + currentRefPosition->reload = false; + resolveLocalRef(nullptr, nullptr, currentRefPosition); + regNumber reg; + if (currentRefPosition->registerAssignment != RBM_NONE) + { + reg = currentRefPosition->assignedReg(); + } + else + { + reg = REG_STK; + currentRefPosition->getInterval()->isActive = false; + } + setInVarRegForBB(curBBNum, currentRefPosition->getInterval()->varNum, reg); + } + + // The next RefPosition should be for the block. Move past it. + assert(currentRefPosition != refPositions.end()); + assert(currentRefPosition->refType == RefTypeBB); + ++currentRefPosition; + + // Handle the RefPositions for the block + for (; currentRefPosition != refPositions.end() && currentRefPosition->refType != RefTypeBB && + currentRefPosition->refType != RefTypeDummyDef; + ++currentRefPosition) + { + currentLocation = currentRefPosition->nodeLocation; + JITDUMP("current : "); + DBEXEC(VERBOSE, currentRefPosition->dump()); + + // Ensure that the spill & copy info is valid. + // First, if it's reload, it must not be copyReg or moveReg + assert(!currentRefPosition->reload || (!currentRefPosition->copyReg && !currentRefPosition->moveReg)); + // If it's copyReg it must not be moveReg, and vice-versa + assert(!currentRefPosition->copyReg || !currentRefPosition->moveReg); + + switch (currentRefPosition->refType) + { +#ifdef FEATURE_SIMD + case RefTypeUpperVectorSaveUse: + case RefTypeUpperVectorSaveDef: +#endif // FEATURE_SIMD + case RefTypeUse: + case RefTypeDef: + // These are the ones we're interested in + break; + case RefTypeKill: + case RefTypeFixedReg: + // These require no handling at resolution time + assert(currentRefPosition->referent != nullptr); + currentRefPosition->referent->recentRefPosition = currentRefPosition; + continue; + case RefTypeExpUse: + // Ignore the ExpUse cases - a RefTypeExpUse would only exist if the + // variable is dead at the entry to the next block. So we'll mark + // it as in its current location and resolution will take care of any + // mismatch. + assert(getNextBlock() == nullptr || + !VarSetOps::IsMember(compiler, getNextBlock()->bbLiveIn, + currentRefPosition->getInterval()->getVarIndex(compiler))); + currentRefPosition->referent->recentRefPosition = currentRefPosition; + continue; + case RefTypeKillGCRefs: + // No action to take at resolution time, and no interval to update recentRefPosition for. + continue; + case RefTypeDummyDef: + case RefTypeParamDef: + case RefTypeZeroInit: + // Should have handled all of these already + default: + unreached(); + break; + } + updateMaxSpill(currentRefPosition); + GenTree* treeNode = currentRefPosition->treeNode; + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (currentRefPosition->refType == RefTypeUpperVectorSaveDef) + { + // The treeNode must be a call, and this must be a RefPosition for a LargeVectorType LocalVar. + // If the LocalVar is in a callee-save register, we are going to spill its upper half around the call. + // If we have allocated a register to spill it to, we will use that; otherwise, we will spill it + // to the stack. We can use as a temp register any non-arg caller-save register. + noway_assert(treeNode != nullptr); + currentRefPosition->referent->recentRefPosition = currentRefPosition; + insertUpperVectorSaveAndReload(treeNode, currentRefPosition, block); + } + else if (currentRefPosition->refType == RefTypeUpperVectorSaveUse) + { + continue; + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + // Most uses won't actually need to be recorded (they're on the def). + // In those cases, treeNode will be nullptr. + if (treeNode == nullptr) + { + // This is either a use, a dead def, or a field of a struct + Interval* interval = currentRefPosition->getInterval(); + assert(currentRefPosition->refType == RefTypeUse || + currentRefPosition->registerAssignment == RBM_NONE || interval->isStructField); + + // TODO-Review: Need to handle the case where any of the struct fields + // are reloaded/spilled at this use + assert(!interval->isStructField || + (currentRefPosition->reload == false && currentRefPosition->spillAfter == false)); + + if (interval->isLocalVar && !interval->isStructField) + { + LclVarDsc* varDsc = interval->getLocalVar(compiler); + + // This must be a dead definition. We need to mark the lclVar + // so that it's not considered a candidate for lvRegister, as + // this dead def will have to go to the stack. + assert(currentRefPosition->refType == RefTypeDef); + varDsc->lvRegNum = REG_STK; + } + + JITDUMP("No tree node to write back to\n"); + continue; + } + + DBEXEC(VERBOSE, lsraDispNode(treeNode, LSRA_DUMP_REFPOS, true)); + JITDUMP("\n"); + + LsraLocation loc = treeNode->gtLsraInfo.loc; + JITDUMP("curr = %u mapped = %u", currentLocation, loc); + assert(treeNode->IsLocal() || currentLocation == loc || currentLocation == loc + 1); + + if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal) + { + JITDUMP(" internal"); + GenTreePtr indNode = nullptr; + if (treeNode->OperIsIndir()) + { + indNode = treeNode; + JITDUMP(" allocated at GT_IND"); + } + if (indNode != nullptr) + { + GenTreePtr addrNode = indNode->gtOp.gtOp1->gtEffectiveVal(); + if (addrNode->OperGet() != GT_ARR_ELEM) + { + addrNode->gtRsvdRegs |= currentRefPosition->registerAssignment; + JITDUMP(", recorded on addr"); + } + } + if (treeNode->OperGet() == GT_ARR_ELEM) + { + // TODO-Review: See WORKAROUND ALERT in buildRefPositionsForNode() + GenTreePtr firstIndexTree = treeNode->gtArrElem.gtArrInds[0]->gtEffectiveVal(); + assert(firstIndexTree != nullptr); + if (firstIndexTree->IsLocal() && (firstIndexTree->gtFlags & GTF_VAR_DEATH) == 0) + { + // Record the LAST internal interval + // (Yes, this naively just records each one, but the next will replace it; + // I'd fix this if it wasn't just a temporary fix) + if (currentRefPosition->refType == RefTypeDef) + { + JITDUMP(" allocated at GT_ARR_ELEM, recorded on firstIndex V%02u"); + firstIndexTree->gtRsvdRegs = (regMaskSmall)currentRefPosition->registerAssignment; + } + } + } + treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment; + } + else + { + writeRegisters(currentRefPosition, treeNode); + + if (treeNode->IsLocal() && currentRefPosition->getInterval()->isLocalVar) + { + resolveLocalRef(block, treeNode, currentRefPosition); + } + + // Mark spill locations on temps + // (local vars are handled in resolveLocalRef, above) + // Note that the tree node will be changed from GTF_SPILL to GTF_SPILLED + // in codegen, taking care of the "reload" case for temps + else if (currentRefPosition->spillAfter || (currentRefPosition->nextRefPosition != nullptr && + currentRefPosition->nextRefPosition->moveReg)) + { + if (treeNode != nullptr && currentRefPosition->isIntervalRef()) + { + if (currentRefPosition->spillAfter) + { + treeNode->gtFlags |= GTF_SPILL; + + // If this is a constant interval that is reusing a pre-existing value, we actually need + // to generate the value at this point in order to spill it. + if (treeNode->IsReuseRegVal()) + { + treeNode->ResetReuseRegVal(); + } + + // In case of multi-reg call node, also set spill flag on the + // register specified by multi-reg index of current RefPosition. + // Note that the spill flag on treeNode indicates that one or + // more its allocated registers are in that state. + if (treeNode->IsMultiRegCall()) + { + GenTreeCall* call = treeNode->AsCall(); + call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx()); + } + } + + // If the value is reloaded or moved to a different register, we need to insert + // a node to hold the register to which it should be reloaded + RefPosition* nextRefPosition = currentRefPosition->nextRefPosition; + assert(nextRefPosition != nullptr); + if (INDEBUG(alwaysInsertReload() ||) + nextRefPosition->assignedReg() != currentRefPosition->assignedReg()) + { + if (nextRefPosition->assignedReg() != REG_NA) + { + insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(), + nextRefPosition); + } + else + { + assert(nextRefPosition->AllocateIfProfitable()); + + // In case of tree temps, if def is spilled and use didn't + // get a register, set a flag on tree node to be treated as + // contained at the point of its use. + if (currentRefPosition->spillAfter && currentRefPosition->refType == RefTypeDef && + nextRefPosition->refType == RefTypeUse) + { + assert(nextRefPosition->treeNode == nullptr); + treeNode->gtFlags |= GTF_NOREG_AT_USE; + } + } + } + } + + // We should never have to "spill after" a temp use, since + // they're single use + else + { + unreached(); + } + } + } + JITDUMP("\n"); + } + + processBlockEndLocations(block); + } + +#ifdef DEBUG + if (VERBOSE) + { + printf("-----------------------\n"); + printf("RESOLVING BB BOUNDARIES\n"); + printf("-----------------------\n"); + + printf("Prior to Resolution\n"); + foreach_block(compiler, block) + { + printf("\nBB%02u use def in out\n", block->bbNum); + dumpConvertedVarSet(compiler, block->bbVarUse); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbVarDef); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbLiveIn); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbLiveOut); + printf("\n"); + + dumpInVarToRegMap(block); + dumpOutVarToRegMap(block); + } + + printf("\n\n"); + } +#endif // DEBUG + + resolveEdges(); + + // Verify register assignments on variables + unsigned lclNum; + LclVarDsc* varDsc; + for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) + { + if (!isCandidateVar(varDsc)) + { + varDsc->lvRegNum = REG_STK; + } + else + { + Interval* interval = getIntervalForLocalVar(lclNum); + + // Determine initial position for parameters + + if (varDsc->lvIsParam) + { + regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment; + regNumber initialReg = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter) + ? REG_STK + : genRegNumFromMask(initialRegMask); + regNumber sourceReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK; + +#ifdef _TARGET_ARM_ + if (varTypeIsMultiReg(varDsc)) + { + // TODO-ARM-NYI: Map the hi/lo intervals back to lvRegNum and lvOtherReg (these should NYI before + // this) + assert(!"Multi-reg types not yet supported"); + } + else +#endif // _TARGET_ARM_ + { + varDsc->lvArgInitReg = initialReg; + JITDUMP(" Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg)); + } + if (!varDsc->lvIsRegArg) + { + // stack arg + if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)) + { + if (sourceReg != initialReg) + { + // The code generator won't initialize struct + // fields, so we have to do that if it's not already + // where it belongs. + assert(interval->isStructField); + JITDUMP(" Move struct field param V%02u from %s to %s\n", lclNum, getRegName(sourceReg), + getRegName(initialReg)); + insertMove(insertionBlock, insertionPoint, lclNum, sourceReg, initialReg); + } + } + } + } + + // If lvRegNum is REG_STK, that means that either no register + // was assigned, or (more likely) that the same register was not + // used for all references. In that case, codegen gets the register + // from the tree node. + if (varDsc->lvRegNum == REG_STK || interval->isSpilled || interval->isSplit) + { + // For codegen purposes, we'll set lvRegNum to whatever register + // it's currently in as we go. + // However, we never mark an interval as lvRegister if it has either been spilled + // or split. + varDsc->lvRegister = false; + + // Skip any dead defs or exposed uses + // (first use exposed will only occur when there is no explicit initialization) + RefPosition* firstRefPosition = interval->firstRefPosition; + while ((firstRefPosition != nullptr) && (firstRefPosition->refType == RefTypeExpUse)) + { + firstRefPosition = firstRefPosition->nextRefPosition; + } + if (firstRefPosition == nullptr) + { + // Dead interval + varDsc->lvLRACandidate = false; + if (varDsc->lvRefCnt == 0) + { + varDsc->lvOnFrame = false; + } + else + { + // We may encounter cases where a lclVar actually has no references, but + // a non-zero refCnt. For safety (in case this is some "hidden" lclVar that we're + // not correctly recognizing), we'll mark those as needing a stack location. + // TODO-Cleanup: Make this an assert if/when we correct the refCnt + // updating. + varDsc->lvOnFrame = true; + } + } + else + { + // If the interval was not spilled, it doesn't need a stack location. + if (!interval->isSpilled) + { + varDsc->lvOnFrame = false; + } + if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter) + { + // Either this RefPosition is spilled, or it is not a "real" def or use + assert(firstRefPosition->spillAfter || + (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse)); + varDsc->lvRegNum = REG_STK; + } + else + { + varDsc->lvRegNum = firstRefPosition->assignedReg(); + } + } + } + else + { + { + varDsc->lvRegister = true; + varDsc->lvOnFrame = false; + } +#ifdef DEBUG + regMaskTP registerAssignment = genRegMask(varDsc->lvRegNum); + assert(!interval->isSpilled && !interval->isSplit); + RefPosition* refPosition = interval->firstRefPosition; + assert(refPosition != nullptr); + + while (refPosition != nullptr) + { + // All RefPositions must match, except for dead definitions, + // copyReg/moveReg and RefTypeExpUse positions + if (refPosition->registerAssignment != RBM_NONE && !refPosition->copyReg && !refPosition->moveReg && + refPosition->refType != RefTypeExpUse) + { + assert(refPosition->registerAssignment == registerAssignment); + } + refPosition = refPosition->nextRefPosition; + } +#endif // DEBUG + } + } + } + +#ifdef DEBUG + if (VERBOSE) + { + printf("Trees after linear scan register allocator (LSRA)\n"); + compiler->fgDispBasicBlocks(true); + } + + verifyFinalAllocation(); +#endif // DEBUG + + compiler->raMarkStkVars(); + recordMaxSpill(); + + // TODO-CQ: Review this comment and address as needed. + // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT) + // so that the gc tracking logic and lvMustInit logic will ignore them. + // Extract the code that does this from raAssignVars, and call it here. + // PRECONDITIONS: Ensure that lvPromoted is set on promoted structs, if and + // only if it is promoted on all paths. + // Call might be something like: + // compiler->BashUnusedStructLocals(); +} + +// +//------------------------------------------------------------------------ +// insertMove: Insert a move of a lclVar with the given lclNum into the given block. +// +// Arguments: +// block - the BasicBlock into which the move will be inserted. +// insertionPoint - the instruction before which to insert the move +// lclNum - the lclNum of the var to be moved +// fromReg - the register from which the var is moving +// toReg - the register to which the var is moving +// +// Return Value: +// None. +// +// Notes: +// If insertionPoint is non-NULL, insert before that instruction; +// otherwise, insert "near" the end (prior to the branch, if any). +// If fromReg or toReg is REG_STK, then move from/to memory, respectively. + +void LinearScan::insertMove( + BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg) +{ + LclVarDsc* varDsc = compiler->lvaTable + lclNum; + // One or both MUST be a register + assert(fromReg != REG_STK || toReg != REG_STK); + // They must not be the same register. + assert(fromReg != toReg); + + // This var can't be marked lvRegister now + varDsc->lvRegNum = REG_STK; + + var_types lclTyp = varDsc->TypeGet(); + if (varDsc->lvNormalizeOnStore()) + { + lclTyp = genActualType(lclTyp); + } + GenTreePtr src = compiler->gtNewLclvNode(lclNum, lclTyp); + src->gtLsraInfo.isLsraAdded = true; + GenTreePtr top; + + // If we are moving from STK to reg, mark the lclVar nodes with GTF_SPILLED + // Otherwise, if we are moving from reg to stack, mark it as GTF_SPILL + // Finally, for a reg-to-reg move, generate a GT_COPY + + top = src; + if (fromReg == REG_STK) + { + src->gtFlags |= GTF_SPILLED; + src->gtRegNum = toReg; + } + else if (toReg == REG_STK) + { + src->gtFlags |= GTF_SPILL; + src->SetInReg(); + src->gtRegNum = fromReg; + } + else + { + top = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, varDsc->TypeGet(), src); + // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag. + // Note that if src is itself a lastUse, this will have no effect. + top->gtFlags &= ~(GTF_VAR_DEATH); + src->gtRegNum = fromReg; + src->SetInReg(); + top->gtRegNum = toReg; + src->gtNext = top; + top->gtPrev = src; + src->gtLsraInfo.isLocalDefUse = false; + top->gtLsraInfo.isLsraAdded = true; + } + top->gtLsraInfo.isLocalDefUse = true; + + LIR::Range treeRange = LIR::SeqTree(compiler, top); + LIR::Range& blockRange = LIR::AsRange(block); + + if (insertionPoint != nullptr) + { + blockRange.InsertBefore(insertionPoint, std::move(treeRange)); + } + else + { + // Put the copy at the bottom + // If there's a branch, make an embedded statement that executes just prior to the branch + if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH) + { + noway_assert(!blockRange.IsEmpty()); + + GenTree* branch = blockRange.LastNode(); + assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE || + branch->OperGet() == GT_SWITCH); + + blockRange.InsertBefore(branch, std::move(treeRange)); + } + else + { + assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS); + blockRange.InsertAtEnd(std::move(treeRange)); + } + } +} + +void LinearScan::insertSwap( + BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2) +{ +#ifdef DEBUG + if (VERBOSE) + { + const char* insertionPointString = "top"; + if (insertionPoint == nullptr) + { + insertionPointString = "bottom"; + } + printf(" BB%02u %s: swap V%02u in %s with V%02u in %s\n", block->bbNum, insertionPointString, lclNum1, + getRegName(reg1), lclNum2, getRegName(reg2)); + } +#endif // DEBUG + + LclVarDsc* varDsc1 = compiler->lvaTable + lclNum1; + LclVarDsc* varDsc2 = compiler->lvaTable + lclNum2; + assert(reg1 != REG_STK && reg1 != REG_NA && reg2 != REG_STK && reg2 != REG_NA); + + GenTreePtr lcl1 = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet()); + lcl1->gtLsraInfo.isLsraAdded = true; + lcl1->gtLsraInfo.isLocalDefUse = false; + lcl1->SetInReg(); + lcl1->gtRegNum = reg1; + + GenTreePtr lcl2 = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet()); + lcl2->gtLsraInfo.isLsraAdded = true; + lcl2->gtLsraInfo.isLocalDefUse = false; + lcl2->SetInReg(); + lcl2->gtRegNum = reg2; + + GenTreePtr swap = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2); + swap->gtLsraInfo.isLsraAdded = true; + swap->gtLsraInfo.isLocalDefUse = false; + swap->gtRegNum = REG_NA; + + lcl1->gtNext = lcl2; + lcl2->gtPrev = lcl1; + lcl2->gtNext = swap; + swap->gtPrev = lcl2; + + LIR::Range swapRange = LIR::SeqTree(compiler, swap); + LIR::Range& blockRange = LIR::AsRange(block); + + if (insertionPoint != nullptr) + { + blockRange.InsertBefore(insertionPoint, std::move(swapRange)); + } + else + { + // Put the copy at the bottom + // If there's a branch, make an embedded statement that executes just prior to the branch + if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH) + { + noway_assert(!blockRange.IsEmpty()); + + GenTree* branch = blockRange.LastNode(); + assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE || + branch->OperGet() == GT_SWITCH); + + blockRange.InsertBefore(branch, std::move(swapRange)); + } + else + { + assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS); + blockRange.InsertAtEnd(std::move(swapRange)); + } + } +} + +//------------------------------------------------------------------------ +// getTempRegForResolution: Get a free register to use for resolution code. +// +// Arguments: +// fromBlock - The "from" block on the edge being resolved. +// toBlock - The "to"block on the edge +// type - the type of register required +// +// Return Value: +// Returns a register that is free on the given edge, or REG_NA if none is available. +// +// Notes: +// It is up to the caller to check the return value, and to determine whether a register is +// available, and to handle that case appropriately. +// It is also up to the caller to cache the return value, as this is not cheap to compute. + +regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type) +{ + // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps + // and they would be more space-efficient as well. + VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum); + VarToRegMap toVarToRegMap = getInVarToRegMap(toBlock->bbNum); + + regMaskTP freeRegs = allRegs(type); +#ifdef DEBUG + if (getStressLimitRegs() == LSRA_LIMIT_SMALL_SET) + { + return REG_NA; + } +#endif // DEBUG + INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs)); + + // We are only interested in the variables that are live-in to the "to" block. + VARSET_ITER_INIT(compiler, iter, toBlock->bbLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex) && freeRegs != RBM_NONE) + { + regNumber fromReg = fromVarToRegMap[varIndex]; + regNumber toReg = toVarToRegMap[varIndex]; + assert(fromReg != REG_NA && toReg != REG_NA); + if (fromReg != REG_STK) + { + freeRegs &= ~genRegMask(fromReg); + } + if (toReg != REG_STK) + { + freeRegs &= ~genRegMask(toReg); + } + } + if (freeRegs == RBM_NONE) + { + return REG_NA; + } + else + { + regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs)); + return tempReg; + } +} + +//------------------------------------------------------------------------ +// addResolution: Add a resolution move of the given interval +// +// Arguments: +// block - the BasicBlock into which the move will be inserted. +// insertionPoint - the instruction before which to insert the move +// interval - the interval of the var to be moved +// toReg - the register to which the var is moving +// fromReg - the register from which the var is moving +// +// Return Value: +// None. +// +// Notes: +// For joins, we insert at the bottom (indicated by an insertionPoint +// of nullptr), while for splits we insert at the top. +// This is because for joins 'block' is a pred of the join, while for splits it is a succ. +// For critical edges, this function may be called twice - once to move from +// the source (fromReg), if any, to the stack, in which case toReg will be +// REG_STK, and we insert at the bottom (leave insertionPoint as nullptr). +// The next time, we want to move from the stack to the destination (toReg), +// in which case fromReg will be REG_STK, and we insert at the top. + +void LinearScan::addResolution( + BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber toReg, regNumber fromReg) +{ +#ifdef DEBUG + const char* insertionPointString = "top"; +#endif // DEBUG + if (insertionPoint == nullptr) + { +#ifdef DEBUG + insertionPointString = "bottom"; +#endif // DEBUG + } + + JITDUMP(" BB%02u %s: move V%02u from ", block->bbNum, insertionPointString, interval->varNum); + JITDUMP("%s to %s", getRegName(fromReg), getRegName(toReg)); + + insertMove(block, insertionPoint, interval->varNum, fromReg, toReg); + if (fromReg == REG_STK || toReg == REG_STK) + { + interval->isSpilled = true; + } + else + { + interval->isSplit = true; + } +} + +//------------------------------------------------------------------------ +// handleOutgoingCriticalEdges: Performs the necessary resolution on all critical edges that feed out of 'block' +// +// Arguments: +// block - the block with outgoing critical edges. +// +// Return Value: +// None.. +// +// Notes: +// For all outgoing critical edges (i.e. any successor of this block which is +// a join edge), if there are any conflicts, split the edge by adding a new block, +// and generate the resolution code into that block. + +void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) +{ + VARSET_TP VARSET_INIT_NOCOPY(sameResolutionSet, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(sameLivePathsSet, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(singleTargetSet, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(diffResolutionSet, VarSetOps::MakeEmpty(compiler)); + + // Get the outVarToRegMap for this block + VarToRegMap outVarToRegMap = getOutVarToRegMap(block->bbNum); + unsigned succCount = block->NumSucc(compiler); + assert(succCount > 1); + VarToRegMap firstSuccInVarToRegMap = nullptr; + BasicBlock* firstSucc = nullptr; + + // First, determine the live regs at the end of this block so that we know what regs are + // available to copy into. + regMaskTP liveOutRegs = RBM_NONE; + VARSET_ITER_INIT(compiler, iter1, block->bbLiveOut, varIndex1); + while (iter1.NextElem(compiler, &varIndex1)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex1]; + regNumber fromReg = getVarReg(outVarToRegMap, varNum); + if (fromReg != REG_STK) + { + liveOutRegs |= genRegMask(fromReg); + } + } + + // Next, if this blocks ends with a switch table, we have to make sure not to copy + // into the registers that it uses. + regMaskTP switchRegs = RBM_NONE; + if (block->bbJumpKind == BBJ_SWITCH) + { + // At this point, Lowering has transformed any non-switch-table blocks into + // cascading ifs. + GenTree* switchTable = LIR::AsRange(block).LastNode(); + assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE); + + switchRegs = switchTable->gtRsvdRegs; + GenTree* op1 = switchTable->gtGetOp1(); + GenTree* op2 = switchTable->gtGetOp2(); + noway_assert(op1 != nullptr && op2 != nullptr); + assert(op1->gtRegNum != REG_NA && op2->gtRegNum != REG_NA); + switchRegs |= genRegMask(op1->gtRegNum); + switchRegs |= genRegMask(op2->gtRegNum); + } + + VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap; + regMaskTP sameWriteRegs = RBM_NONE; + regMaskTP diffReadRegs = RBM_NONE; + + // For each var, classify them as: + // - in the same register at the end of this block and at each target (no resolution needed) + // - in different registers at different targets (resolve separately): + // diffResolutionSet + // - in the same register at each target at which it's live, but different from the end of + // this block. We may be able to resolve these as if it is "join", but only if they do not + // write to any registers that are read by those in the diffResolutionSet: + // sameResolutionSet + + VARSET_ITER_INIT(compiler, iter, block->bbLiveOut, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + regNumber fromReg = getVarReg(outVarToRegMap, varNum); + bool isMatch = true; + bool isSame = false; + bool maybeSingleTarget = false; + bool maybeSameLivePaths = false; + bool liveOnlyAtSplitEdge = true; + regNumber sameToReg = REG_NA; + for (unsigned succIndex = 0; succIndex < succCount; succIndex++) + { + BasicBlock* succBlock = block->GetSucc(succIndex, compiler); + if (!VarSetOps::IsMember(compiler, succBlock->bbLiveIn, varIndex)) + { + maybeSameLivePaths = true; + continue; + } + else if (liveOnlyAtSplitEdge) + { + // Is the var live only at those target blocks which are connected by a split edge to this block + liveOnlyAtSplitEdge = ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB)); + } + + regNumber toReg = getVarReg(getInVarToRegMap(succBlock->bbNum), varNum); + if (sameToReg == REG_NA) + { + sameToReg = toReg; + continue; + } + if (toReg == sameToReg) + { + continue; + } + sameToReg = REG_NA; + break; + } + + // Check for the cases where we can't write to a register. + // We only need to check for these cases if sameToReg is an actual register (not REG_STK). + if (sameToReg != REG_NA && sameToReg != REG_STK) + { + // If there's a path on which this var isn't live, it may use the original value in sameToReg. + // In this case, sameToReg will be in the liveOutRegs of this block. + // Similarly, if sameToReg is in sameWriteRegs, it has already been used (i.e. for a lclVar that's + // live only at another target), and we can't copy another lclVar into that reg in this block. + regMaskTP sameToRegMask = genRegMask(sameToReg); + if (maybeSameLivePaths && + (((sameToRegMask & liveOutRegs) != RBM_NONE) || ((sameToRegMask & sameWriteRegs) != RBM_NONE))) + { + sameToReg = REG_NA; + } + // If this register is used by a switch table at the end of the block, we can't do the copy + // in this block (since we can't insert it after the switch). + if ((sameToRegMask & switchRegs) != RBM_NONE) + { + sameToReg = REG_NA; + } + + // If the var is live only at those blocks connected by a split edge and not live-in at some of the + // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution + // will be deferred to the handling of split edges, which means copy will only be at those target(s). + // + // Another way to achieve similar resolution for vars live only at split edges is by removing them + // from consideration up-front but it requires that we traverse those edges anyway to account for + // the registers that must note be overwritten. + if (liveOnlyAtSplitEdge && maybeSameLivePaths) + { + sameToReg = REG_NA; + } + } + + if (sameToReg == REG_NA) + { + VarSetOps::AddElemD(compiler, diffResolutionSet, varIndex); + if (fromReg != REG_STK) + { + diffReadRegs |= genRegMask(fromReg); + } + } + else if (sameToReg != fromReg) + { + VarSetOps::AddElemD(compiler, sameResolutionSet, varIndex); + sameVarToRegMap[varIndex] = sameToReg; + if (sameToReg != REG_STK) + { + sameWriteRegs |= genRegMask(sameToReg); + } + } + } + + if (!VarSetOps::IsEmpty(compiler, sameResolutionSet)) + { + if ((sameWriteRegs & diffReadRegs) != RBM_NONE) + { + // We cannot split the "same" and "diff" regs if the "same" set writes registers + // that must be read by the "diff" set. (Note that when these are done as a "batch" + // we carefully order them to ensure all the input regs are read before they are + // overwritten.) + VarSetOps::UnionD(compiler, diffResolutionSet, sameResolutionSet); + VarSetOps::ClearD(compiler, sameResolutionSet); + } + else + { + // For any vars in the sameResolutionSet, we can simply add the move at the end of "block". + resolveEdge(block, nullptr, ResolveSharedCritical, sameResolutionSet); + } + } + if (!VarSetOps::IsEmpty(compiler, diffResolutionSet)) + { + for (unsigned succIndex = 0; succIndex < succCount; succIndex++) + { + BasicBlock* succBlock = block->GetSucc(succIndex, compiler); + + // Any "diffResolutionSet" resolution for a block with no other predecessors will be handled later + // as split resolution. + if ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB)) + { + continue; + } + + // Now collect the resolution set for just this edge, if any. + // Check only the vars in diffResolutionSet that are live-in to this successor. + bool needsResolution = false; + VarToRegMap succInVarToRegMap = getInVarToRegMap(succBlock->bbNum); + VARSET_TP VARSET_INIT_NOCOPY(edgeResolutionSet, + VarSetOps::Intersection(compiler, diffResolutionSet, succBlock->bbLiveIn)); + VARSET_ITER_INIT(compiler, iter, edgeResolutionSet, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + Interval* interval = getIntervalForLocalVar(varNum); + regNumber fromReg = getVarReg(outVarToRegMap, varNum); + regNumber toReg = getVarReg(succInVarToRegMap, varNum); + + if (fromReg == toReg) + { + VarSetOps::RemoveElemD(compiler, edgeResolutionSet, varIndex); + } + } + if (!VarSetOps::IsEmpty(compiler, edgeResolutionSet)) + { + resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet); + } + } + } +} + +//------------------------------------------------------------------------ +// resolveEdges: Perform resolution across basic block edges +// +// Arguments: +// None. +// +// Return Value: +// None. +// +// Notes: +// Traverse the basic blocks. +// - If this block has a single predecessor that is not the immediately +// preceding block, perform any needed 'split' resolution at the beginning of this block +// - Otherwise if this block has critical incoming edges, handle them. +// - If this block has a single successor that has multiple predecesors, perform any needed +// 'join' resolution at the end of this block. +// Note that a block may have both 'split' or 'critical' incoming edge(s) and 'join' outgoing +// edges. + +void LinearScan::resolveEdges() +{ + JITDUMP("RESOLVING EDGES\n"); + + BasicBlock *block, *prevBlock = nullptr; + + // Handle all the critical edges first. + // We will try to avoid resolution across critical edges in cases where all the critical-edge + // targets of a block have the same home. We will then split the edges only for the + // remaining mismatches. We visit the out-edges, as that allows us to share the moves that are + // common among allt he targets. + + foreach_block(compiler, block) + { + if (block->bbNum > bbNumMaxBeforeResolution) + { + // This is a new block added during resolution - we don't need to visit these now. + continue; + } + if (blockInfo[block->bbNum].hasCriticalOutEdge) + { + handleOutgoingCriticalEdges(block); + } + prevBlock = block; + } + + prevBlock = nullptr; + foreach_block(compiler, block) + { + if (block->bbNum > bbNumMaxBeforeResolution) + { + // This is a new block added during resolution - we don't need to visit these now. + continue; + } + + unsigned succCount = block->NumSucc(compiler); + flowList* preds = block->bbPreds; + BasicBlock* uniquePredBlock = block->GetUniquePred(compiler); + + // First, if this block has a single predecessor, + // we may need resolution at the beginning of this block. + // This may be true even if it's the block we used for starting locations, + // if a variable was spilled. + if (!VarSetOps::IsEmpty(compiler, block->bbLiveIn)) + { + if (uniquePredBlock != nullptr) + { + // We may have split edges during critical edge resolution, and in the process split + // a non-critical edge as well. + // It is unlikely that we would ever have more than one of these in sequence (indeed, + // I don't think it's possible), but there's no need to assume that it can't. + while (uniquePredBlock->bbNum > bbNumMaxBeforeResolution) + { + uniquePredBlock = uniquePredBlock->GetUniquePred(compiler); + noway_assert(uniquePredBlock != nullptr); + } + resolveEdge(uniquePredBlock, block, ResolveSplit, block->bbLiveIn); + } + } + + // Finally, if this block has a single successor: + // - and that has at least one other predecessor (otherwise we will do the resolution at the + // top of the successor), + // - and that is not the target of a critical edge (otherwise we've already handled it) + // we may need resolution at the end of this block. + + if (succCount == 1) + { + BasicBlock* succBlock = block->GetSucc(0, compiler); + if (succBlock->GetUniquePred(compiler) == nullptr) + { + resolveEdge(block, succBlock, ResolveJoin, succBlock->bbLiveIn); + } + } + } + + // Now, fixup the mapping for any blocks that were adding for edge splitting. + // See the comment prior to the call to fgSplitEdge() in resolveEdge(). + // Note that we could fold this loop in with the checking code below, but that + // would only improve the debug case, and would clutter up the code somewhat. + if (compiler->fgBBNumMax > bbNumMaxBeforeResolution) + { + foreach_block(compiler, block) + { + if (block->bbNum > bbNumMaxBeforeResolution) + { + // There may be multiple blocks inserted when we split. But we must always have exactly + // one path (i.e. all blocks must be single-successor and single-predecessor), + // and only one block along the path may be non-empty. + // Note that we may have a newly-inserted block that is empty, but which connects + // two non-resolution blocks. This happens when an edge is split that requires it. + + BasicBlock* succBlock = block; + do + { + succBlock = succBlock->GetUniqueSucc(); + noway_assert(succBlock != nullptr); + } while ((succBlock->bbNum > bbNumMaxBeforeResolution) && succBlock->isEmpty()); + + BasicBlock* predBlock = block; + do + { + predBlock = predBlock->GetUniquePred(compiler); + noway_assert(predBlock != nullptr); + } while ((predBlock->bbNum > bbNumMaxBeforeResolution) && predBlock->isEmpty()); + + unsigned succBBNum = succBlock->bbNum; + unsigned predBBNum = predBlock->bbNum; + if (block->isEmpty()) + { + // For the case of the empty block, find the non-resolution block (succ or pred). + if (predBBNum > bbNumMaxBeforeResolution) + { + assert(succBBNum <= bbNumMaxBeforeResolution); + predBBNum = 0; + } + else + { + succBBNum = 0; + } + } + else + { + assert((succBBNum <= bbNumMaxBeforeResolution) && (predBBNum <= bbNumMaxBeforeResolution)); + } + SplitEdgeInfo info = {predBBNum, succBBNum}; + getSplitBBNumToTargetBBNumMap()->Set(block->bbNum, info); + } + } + } + +#ifdef DEBUG + // Make sure the varToRegMaps match up on all edges. + bool foundMismatch = false; + foreach_block(compiler, block) + { + if (block->isEmpty() && block->bbNum > bbNumMaxBeforeResolution) + { + continue; + } + VarToRegMap toVarToRegMap = getInVarToRegMap(block->bbNum); + for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext) + { + BasicBlock* predBlock = pred->flBlock; + VarToRegMap fromVarToRegMap = getOutVarToRegMap(predBlock->bbNum); + VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + regNumber fromReg = getVarReg(fromVarToRegMap, varNum); + regNumber toReg = getVarReg(toVarToRegMap, varNum); + if (fromReg != toReg) + { + Interval* interval = getIntervalForLocalVar(varNum); + if (!foundMismatch) + { + foundMismatch = true; + printf("Found mismatched var locations after resolution!\n"); + } + printf(" V%02u: BB%02u to BB%02u: ", varNum, predBlock->bbNum, block->bbNum); + printf("%s to %s\n", getRegName(fromReg), getRegName(toReg)); + } + } + } + } + assert(!foundMismatch); +#endif + JITDUMP("\n"); +} + +//------------------------------------------------------------------------ +// resolveEdge: Perform the specified type of resolution between two blocks. +// +// Arguments: +// fromBlock - the block from which the edge originates +// toBlock - the block at which the edge terminates +// resolveType - the type of resolution to be performed +// liveSet - the set of tracked lclVar indices which may require resolution +// +// Return Value: +// None. +// +// Assumptions: +// The caller must have performed the analysis to determine the type of the edge. +// +// Notes: +// This method emits the correctly ordered moves necessary to place variables in the +// correct registers across a Split, Join or Critical edge. +// In order to avoid overwriting register values before they have been moved to their +// new home (register/stack), it first does the register-to-stack moves (to free those +// registers), then the register to register moves, ensuring that the target register +// is free before the move, and then finally the stack to register moves. + +void LinearScan::resolveEdge(BasicBlock* fromBlock, + BasicBlock* toBlock, + ResolveType resolveType, + VARSET_VALARG_TP liveSet) +{ + VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum); + VarToRegMap toVarToRegMap; + if (resolveType == ResolveSharedCritical) + { + toVarToRegMap = sharedCriticalVarToRegMap; + } + else + { + toVarToRegMap = getInVarToRegMap(toBlock->bbNum); + } + + // The block to which we add the resolution moves depends on the resolveType + BasicBlock* block; + switch (resolveType) + { + case ResolveJoin: + case ResolveSharedCritical: + block = fromBlock; + break; + case ResolveSplit: + block = toBlock; + break; + case ResolveCritical: + // fgSplitEdge may add one or two BasicBlocks. It returns the block that splits + // the edge from 'fromBlock' and 'toBlock', but if it inserts that block right after + // a block with a fall-through it will have to create another block to handle that edge. + // These new blocks can be mapped to existing blocks in order to correctly handle + // the calls to recordVarLocationsAtStartOfBB() from codegen. That mapping is handled + // in resolveEdges(), after all the edge resolution has been done (by calling this + // method for each edge). + block = compiler->fgSplitEdge(fromBlock, toBlock); + break; + default: + unreached(); + break; + } + +#ifndef _TARGET_XARCH_ + // We record tempregs for beginning and end of each block. + // For amd64/x86 we only need a tempReg for float - we'll use xchg for int. + // TODO-Throughput: It would be better to determine the tempRegs on demand, but the code below + // modifies the varToRegMaps so we don't have all the correct registers at the time + // we need to get the tempReg. + regNumber tempRegInt = + (resolveType == ResolveSharedCritical) ? REG_NA : getTempRegForResolution(fromBlock, toBlock, TYP_INT); +#endif // !_TARGET_XARCH_ + regNumber tempRegFlt = REG_NA; + if ((compiler->compFloatingPointUsed) && (resolveType != ResolveSharedCritical)) + { + tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT); + } + + regMaskTP targetRegsToDo = RBM_NONE; + regMaskTP targetRegsReady = RBM_NONE; + regMaskTP targetRegsFromStack = RBM_NONE; + + // The following arrays capture the location of the registers as they are moved: + // - location[reg] gives the current location of the var that was originally in 'reg'. + // (Note that a var may be moved more than once.) + // - source[reg] gives the original location of the var that needs to be moved to 'reg'. + // For example, if a var is in rax and needs to be moved to rsi, then we would start with: + // location[rax] == rax + // source[rsi] == rax -- this doesn't change + // Then, if for some reason we need to move it temporary to rbx, we would have: + // location[rax] == rbx + // Once we have completed the move, we will have: + // location[rax] == REG_NA + // This indicates that the var originally in rax is now in its target register. + + regNumberSmall location[REG_COUNT]; + C_ASSERT(sizeof(char) == sizeof(regNumberSmall)); // for memset to work + memset(location, REG_NA, REG_COUNT); + regNumberSmall source[REG_COUNT]; + memset(source, REG_NA, REG_COUNT); + + // What interval is this register associated with? + // (associated with incoming reg) + Interval* sourceIntervals[REG_COUNT] = {nullptr}; + + // Intervals for vars that need to be loaded from the stack + Interval* stackToRegIntervals[REG_COUNT] = {nullptr}; + + // Get the starting insertion point for the "to" resolution + GenTreePtr insertionPoint = nullptr; + if (resolveType == ResolveSplit || resolveType == ResolveCritical) + { + insertionPoint = LIR::AsRange(block).FirstNonPhiNode(); + } + + // First: + // - Perform all moves from reg to stack (no ordering needed on these) + // - For reg to reg moves, record the current location, associating their + // source location with the target register they need to go into + // - For stack to reg moves (done last, no ordering needed between them) + // record the interval associated with the target reg + // TODO-Throughput: We should be looping over the liveIn and liveOut registers, since + // that will scale better than the live variables + + VARSET_ITER_INIT(compiler, iter, liveSet, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + bool isSpilled = false; + Interval* interval = getIntervalForLocalVar(varNum); + regNumber fromReg = getVarReg(fromVarToRegMap, varNum); + regNumber toReg = getVarReg(toVarToRegMap, varNum); + if (fromReg == toReg) + { + continue; + } + + // For Critical edges, the location will not change on either side of the edge, + // since we'll add a new block to do the move. + if (resolveType == ResolveSplit) + { + toVarToRegMap[varIndex] = fromReg; + } + else if (resolveType == ResolveJoin || resolveType == ResolveSharedCritical) + { + fromVarToRegMap[varIndex] = toReg; + } + + assert(fromReg < UCHAR_MAX && toReg < UCHAR_MAX); + + bool done = false; + + if (fromReg != toReg) + { + if (fromReg == REG_STK) + { + stackToRegIntervals[toReg] = interval; + targetRegsFromStack |= genRegMask(toReg); + } + else if (toReg == REG_STK) + { + // Do the reg to stack moves now + addResolution(block, insertionPoint, interval, REG_STK, fromReg); + JITDUMP(" (%s)\n", resolveTypeName[resolveType]); + } + else + { + location[fromReg] = (regNumberSmall)fromReg; + source[toReg] = (regNumberSmall)fromReg; + sourceIntervals[fromReg] = interval; + targetRegsToDo |= genRegMask(toReg); + } + } + } + + // REGISTER to REGISTER MOVES + + // First, find all the ones that are ready to move now + regMaskTP targetCandidates = targetRegsToDo; + while (targetCandidates != RBM_NONE) + { + regMaskTP targetRegMask = genFindLowestBit(targetCandidates); + targetCandidates &= ~targetRegMask; + regNumber targetReg = genRegNumFromMask(targetRegMask); + if (location[targetReg] == REG_NA) + { + targetRegsReady |= targetRegMask; + } + } + + // Perform reg to reg moves + while (targetRegsToDo != RBM_NONE) + { + while (targetRegsReady != RBM_NONE) + { + regMaskTP targetRegMask = genFindLowestBit(targetRegsReady); + targetRegsToDo &= ~targetRegMask; + targetRegsReady &= ~targetRegMask; + regNumber targetReg = genRegNumFromMask(targetRegMask); + assert(location[targetReg] != targetReg); + regNumber sourceReg = (regNumber)source[targetReg]; + regNumber fromReg = (regNumber)location[sourceReg]; + assert(fromReg < UCHAR_MAX && sourceReg < UCHAR_MAX); + Interval* interval = sourceIntervals[sourceReg]; + assert(interval != nullptr); + addResolution(block, insertionPoint, interval, targetReg, fromReg); + JITDUMP(" (%s)\n", resolveTypeName[resolveType]); + sourceIntervals[sourceReg] = nullptr; + location[sourceReg] = REG_NA; + + // Do we have a free targetReg? + if (fromReg == sourceReg && source[fromReg] != REG_NA) + { + regMaskTP fromRegMask = genRegMask(fromReg); + targetRegsReady |= fromRegMask; + } + } + if (targetRegsToDo != RBM_NONE) + { + regMaskTP targetRegMask = genFindLowestBit(targetRegsToDo); + regNumber targetReg = genRegNumFromMask(targetRegMask); + + // Is it already there due to other moves? + // If not, move it to the temp reg, OR swap it with another register + regNumber sourceReg = (regNumber)source[targetReg]; + regNumber fromReg = (regNumber)location[sourceReg]; + if (targetReg == fromReg) + { + targetRegsToDo &= ~targetRegMask; + } + else + { + regNumber tempReg = REG_NA; + bool useSwap = false; + if (emitter::isFloatReg(targetReg)) + { + tempReg = tempRegFlt; + } +#ifdef _TARGET_XARCH_ + else + { + useSwap = true; + } +#else // !_TARGET_XARCH_ + else + { + tempReg = tempRegInt; + } +#endif // !_TARGET_XARCH_ + if (useSwap || tempReg == REG_NA) + { + // First, we have to figure out the destination register for what's currently in fromReg, + // so that we can find its sourceInterval. + regNumber otherTargetReg = REG_NA; + + // By chance, is fromReg going where it belongs? + if (location[source[fromReg]] == targetReg) + { + otherTargetReg = fromReg; + // If we can swap, we will be done with otherTargetReg as well. + // Otherwise, we'll spill it to the stack and reload it later. + if (useSwap) + { + regMaskTP fromRegMask = genRegMask(fromReg); + targetRegsToDo &= ~fromRegMask; + } + } + else + { + // Look at the remaining registers from targetRegsToDo (which we expect to be relatively + // small at this point) to find out what's currently in targetReg. + regMaskTP mask = targetRegsToDo; + while (mask != RBM_NONE && otherTargetReg == REG_NA) + { + regMaskTP nextRegMask = genFindLowestBit(mask); + regNumber nextReg = genRegNumFromMask(nextRegMask); + mask &= ~nextRegMask; + if (location[source[nextReg]] == targetReg) + { + otherTargetReg = nextReg; + } + } + } + assert(otherTargetReg != REG_NA); + + if (useSwap) + { + // Generate a "swap" of fromReg and targetReg + insertSwap(block, insertionPoint, sourceIntervals[source[otherTargetReg]]->varNum, targetReg, + sourceIntervals[sourceReg]->varNum, fromReg); + location[sourceReg] = REG_NA; + location[source[otherTargetReg]] = (regNumberSmall)fromReg; + } + else + { + // Spill "targetReg" to the stack and add its eventual target (otherTargetReg) + // to "targetRegsFromStack", which will be handled below. + // NOTE: This condition is very rare. Setting COMPlus_JitStressRegs=0x203 + // has been known to trigger it in JIT SH. + + // First, spill "otherInterval" from targetReg to the stack. + Interval* otherInterval = sourceIntervals[source[otherTargetReg]]; + addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg); + JITDUMP(" (%s)\n", resolveTypeName[resolveType]); + location[source[otherTargetReg]] = REG_STK; + + // Now, move the interval that is going to targetReg, and add its "fromReg" to + // "targetRegsReady". + addResolution(block, insertionPoint, sourceIntervals[sourceReg], targetReg, fromReg); + JITDUMP(" (%s)\n", resolveTypeName[resolveType]); + location[sourceReg] = REG_NA; + targetRegsReady |= genRegMask(fromReg); + } + targetRegsToDo &= ~targetRegMask; + } + else + { + compiler->codeGen->regSet.rsSetRegsModified(genRegMask(tempReg) DEBUGARG(dumpTerse)); + assert(sourceIntervals[targetReg] != nullptr); + addResolution(block, insertionPoint, sourceIntervals[targetReg], tempReg, targetReg); + JITDUMP(" (%s)\n", resolveTypeName[resolveType]); + location[targetReg] = (regNumberSmall)tempReg; + targetRegsReady |= targetRegMask; + } + } + } + } + + // Finally, perform stack to reg moves + // All the target regs will be empty at this point + while (targetRegsFromStack != RBM_NONE) + { + regMaskTP targetRegMask = genFindLowestBit(targetRegsFromStack); + targetRegsFromStack &= ~targetRegMask; + regNumber targetReg = genRegNumFromMask(targetRegMask); + + Interval* interval = stackToRegIntervals[targetReg]; + assert(interval != nullptr); + + addResolution(block, insertionPoint, interval, targetReg, REG_STK); + JITDUMP(" (%s)\n", resolveTypeName[resolveType]); + } +} + +void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation location) +{ + regMaskTP dstCandidates; + + // if there is a reg indicated on the tree node, use that for dstCandidates + // the exception is the NOP, which sometimes show up around late args. + // TODO-Cleanup: get rid of those NOPs. + if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP) + { + dstCandidates = lsra->allRegs(node->TypeGet()); + } + else + { + dstCandidates = genRegMask(node->gtRegNum); + } + + internalIntCount = 0; + internalFloatCount = 0; + isLocalDefUse = false; + isHelperCallWithKills = false; + isLsraAdded = false; + definesAnyRegisters = false; + + setDstCandidates(lsra, dstCandidates); + srcCandsIndex = dstCandsIndex; + + setInternalCandidates(lsra, lsra->allRegs(TYP_INT)); + + loc = location; +#ifdef DEBUG + isInitialized = true; +#endif + + assert(IsValid(lsra)); +} + +regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(srcCandsIndex); +} + +void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + srcCandsIndex = (unsigned char)i; +} + +regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(dstCandsIndex); +} + +void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + dstCandsIndex = (unsigned char)i; +} + +regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(internalCandsIndex); +} + +void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + internalCandsIndex = (unsigned char)i; +} + +void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask); + assert(FitsIn<unsigned char>(i)); + internalCandsIndex = (unsigned char)i; +} + +#ifdef DEBUG +void dumpRegMask(regMaskTP regs) +{ + if (regs == RBM_ALLINT) + { + printf("[allInt]"); + } + else if (regs == (RBM_ALLINT & ~RBM_FPBASE)) + { + printf("[allIntButFP]"); + } + else if (regs == RBM_ALLFLOAT) + { + printf("[allFloat]"); + } + else if (regs == RBM_ALLDOUBLE) + { + printf("[allDouble]"); + } + else + { + dspRegMask(regs); + } +} + +static const char* getRefTypeName(RefType refType) +{ + switch (refType) + { +#define DEF_REFTYPE(memberName, memberValue, shortName) \ + case memberName: \ + return #memberName; +#include "lsra_reftypes.h" +#undef DEF_REFTYPE + default: + return nullptr; + } +} + +static const char* getRefTypeShortName(RefType refType) +{ + switch (refType) + { +#define DEF_REFTYPE(memberName, memberValue, shortName) \ + case memberName: \ + return shortName; +#include "lsra_reftypes.h" +#undef DEF_REFTYPE + default: + return nullptr; + } +} + +void RefPosition::dump() +{ + printf("<RefPosition #%-3u @%-3u", rpNum, nodeLocation); + + if (nextRefPosition) + { + printf(" ->#%-3u", nextRefPosition->rpNum); + } + + printf(" %s ", getRefTypeName(refType)); + + if (this->isPhysRegRef) + { + this->getReg()->tinyDump(); + } + else if (getInterval()) + { + this->getInterval()->tinyDump(); + } + + if (this->treeNode) + { + printf("%s ", treeNode->OpName(treeNode->OperGet())); + } + printf("BB%02u ", this->bbNum); + + printf("regmask="); + dumpRegMask(registerAssignment); + + if (this->lastUse) + { + printf(" last"); + } + if (this->reload) + { + printf(" reload"); + } + if (this->spillAfter) + { + printf(" spillAfter"); + } + if (this->moveReg) + { + printf(" move"); + } + if (this->copyReg) + { + printf(" copy"); + } + if (this->isFixedRegRef) + { + printf(" fixed"); + } + if (this->isLocalDefUse) + { + printf(" local"); + } + if (this->delayRegFree) + { + printf(" delay"); + } + if (this->outOfOrder) + { + printf(" outOfOrder"); + } + printf(">\n"); +} + +void RegRecord::dump() +{ + tinyDump(); +} + +void Interval::dump() +{ + printf("Interval %2u:", intervalIndex); + + if (isLocalVar) + { + printf(" (V%02u)", varNum); + } + if (isInternal) + { + printf(" (INTERNAL)"); + } + if (isSpilled) + { + printf(" (SPILLED)"); + } + if (isSplit) + { + printf(" (SPLIT)"); + } + if (isStructField) + { + printf(" (struct)"); + } + if (isSpecialPutArg) + { + printf(" (specialPutArg)"); + } + if (isConstant) + { + printf(" (constant)"); + } + + printf(" RefPositions {"); + for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr; + refPosition = refPosition->nextRefPosition) + { + printf("#%u@%u", refPosition->rpNum, refPosition->nodeLocation); + if (refPosition->nextRefPosition) + { + printf(" "); + } + } + printf("}"); + + // this is not used (yet?) + // printf(" SpillOffset %d", this->spillOffset); + + printf(" physReg:%s", getRegName(physReg)); + + printf(" Preferences="); + dumpRegMask(this->registerPreferences); + + if (relatedInterval) + { + printf(" RelatedInterval "); + relatedInterval->microDump(); + printf("[%p]", dspPtr(relatedInterval)); + } + + printf("\n"); +} + +// print out very concise representation +void Interval::tinyDump() +{ + printf("<Ivl:%u", intervalIndex); + if (isLocalVar) + { + printf(" V%02u", varNum); + } + if (isInternal) + { + printf(" internal"); + } + printf("> "); +} + +// print out extremely concise representation +void Interval::microDump() +{ + char intervalTypeChar = 'I'; + if (isInternal) + { + intervalTypeChar = 'T'; + } + else if (isLocalVar) + { + intervalTypeChar = 'L'; + } + + printf("<%c%u>", intervalTypeChar, intervalIndex); +} + +void RegRecord::tinyDump() +{ + printf("<Reg:%-3s> ", getRegName(regNum)); +} + +void TreeNodeInfo::dump(LinearScan* lsra) +{ + printf("<TreeNodeInfo @ %2u %d=%d %di %df", loc, dstCount, srcCount, internalIntCount, internalFloatCount); + printf(" src="); + dumpRegMask(getSrcCandidates(lsra)); + printf(" int="); + dumpRegMask(getInternalCandidates(lsra)); + printf(" dst="); + dumpRegMask(getDstCandidates(lsra)); + if (isLocalDefUse) + { + printf(" L"); + } + if (isInitialized) + { + printf(" I"); + } + if (isHelperCallWithKills) + { + printf(" H"); + } + if (isLsraAdded) + { + printf(" A"); + } + if (isDelayFree) + { + printf(" D"); + } + if (isTgtPref) + { + printf(" P"); + } + printf(">\n"); +} + +void LinearScan::lsraDumpIntervals(const char* msg) +{ + Interval* interval; + + printf("\nLinear scan intervals %s:\n", msg); + for (auto& interval : intervals) + { + // only dump something if it has references + // if (interval->firstRefPosition) + interval.dump(); + } + + printf("\n"); +} + +// Dumps a tree node as a destination or source operand, with the style +// of dump dependent on the mode +void LinearScan::lsraGetOperandString(GenTreePtr tree, + LsraTupleDumpMode mode, + char* operandString, + unsigned operandStringLength) +{ + const char* lastUseChar = ""; + if ((tree->gtFlags & GTF_VAR_DEATH) != 0) + { + lastUseChar = "*"; + } + switch (mode) + { + case LinearScan::LSRA_DUMP_PRE: + _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtSeqNum, lastUseChar); + break; + case LinearScan::LSRA_DUMP_REFPOS: + _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtSeqNum, lastUseChar); + break; + case LinearScan::LSRA_DUMP_POST: + { + Compiler* compiler = JitTls::GetCompiler(); + + if (!tree->gtHasReg()) + { + _snprintf_s(operandString, operandStringLength, operandStringLength, "STK%s", lastUseChar); + } + else + { + _snprintf_s(operandString, operandStringLength, operandStringLength, "%s%s", + getRegName(tree->gtRegNum, useFloatReg(tree->TypeGet())), lastUseChar); + } + } + break; + default: + printf("ERROR: INVALID TUPLE DUMP MODE\n"); + break; + } +} +void LinearScan::lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest) +{ + Compiler* compiler = JitTls::GetCompiler(); + const unsigned operandStringLength = 16; + char operandString[operandStringLength]; + const char* emptyDestOperand = " "; + char spillChar = ' '; + + if (mode == LinearScan::LSRA_DUMP_POST) + { + if ((tree->gtFlags & GTF_SPILL) != 0) + { + spillChar = 'S'; + } + if (!hasDest && tree->gtHasReg()) + { + // This can be true for the "localDefUse" case - defining a reg, but + // pushing it on the stack + assert(spillChar == ' '); + spillChar = '*'; + hasDest = true; + } + } + printf("%c N%03u. ", spillChar, tree->gtSeqNum); + + LclVarDsc* varDsc = nullptr; + unsigned varNum = UINT_MAX; + if (tree->IsLocal()) + { + varNum = tree->gtLclVarCommon.gtLclNum; + varDsc = &(compiler->lvaTable[varNum]); + if (varDsc->lvLRACandidate) + { + hasDest = false; + } + } + if (hasDest) + { + if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED) + { + assert(tree->gtHasReg()); + } + lsraGetOperandString(tree, mode, operandString, operandStringLength); + printf("%-15s =", operandString); + } + else + { + printf("%-15s ", emptyDestOperand); + } + if (varDsc != nullptr) + { + if (varDsc->lvLRACandidate) + { + if (mode == LSRA_DUMP_REFPOS) + { + printf(" V%02u(L%d)", varNum, getIntervalForLocalVar(varNum)->intervalIndex); + } + else + { + lsraGetOperandString(tree, mode, operandString, operandStringLength); + printf(" V%02u(%s)", varNum, operandString); + if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED) + { + printf("R"); + } + } + } + else + { + printf(" V%02u MEM", varNum); + } + } + else if (tree->OperIsAssignment()) + { + assert(!tree->gtHasReg()); + const char* isRev = ""; + if ((tree->gtFlags & GTF_REVERSE_OPS) != 0) + { + isRev = "(Rev)"; + } + printf(" asg%s%s ", GenTree::NodeName(tree->OperGet()), isRev); + } + else + { + compiler->gtDispNodeName(tree); + if ((tree->gtFlags & GTF_REVERSE_OPS) != 0) + { + printf("(Rev)"); + } + if (tree->OperKind() & GTK_LEAF) + { + compiler->gtDispLeaf(tree, nullptr); + } + } +} + +//------------------------------------------------------------------------ +// ComputeOperandDstCount: computes the number of registers defined by a +// node. +// +// For most nodes, this is simple: +// - Nodes that do not produce values (e.g. stores and other void-typed +// nodes) and nodes that immediately use the registers they define +// produce no registers +// - Nodes that are marked as defining N registers define N registers. +// +// For contained nodes, however, things are more complicated: for purposes +// of bookkeeping, a contained node is treated as producing the transitive +// closure of the registers produced by its sources. +// +// Arguments: +// operand - The operand for which to compute a register count. +// +// Returns: +// The number of registers defined by `operand`. +// +void LinearScan::DumpOperandDefs( + GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength) +{ + assert(operand != nullptr); + assert(operandString != nullptr); + + if (ComputeOperandDstCount(operand) == 0) + { + return; + } + + if (operand->gtLsraInfo.dstCount != 0) + { + // This operand directly produces registers; print it. + for (int i = 0; i < operand->gtLsraInfo.dstCount; i++) + { + if (!first) + { + printf(","); + } + + lsraGetOperandString(operand, mode, operandString, operandStringLength); + printf("%s", operandString); + + first = false; + } + } + else + { + // This is a contained node. Dump the defs produced by its operands. + for (GenTree* op : operand->Operands()) + { + DumpOperandDefs(op, first, mode, operandString, operandStringLength); + } + } +} + +void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) +{ + BasicBlock* block; + LsraLocation currentLoc = 1; // 0 is the entry + const unsigned operandStringLength = 16; + char operandString[operandStringLength]; + + // currentRefPosition is not used for LSRA_DUMP_PRE + // We keep separate iterators for defs, so that we can print them + // on the lhs of the dump + auto currentRefPosition = refPositions.begin(); + + switch (mode) + { + case LSRA_DUMP_PRE: + printf("TUPLE STYLE DUMP BEFORE LSRA\n"); + break; + case LSRA_DUMP_REFPOS: + printf("TUPLE STYLE DUMP WITH REF POSITIONS\n"); + break; + case LSRA_DUMP_POST: + printf("TUPLE STYLE DUMP WITH REGISTER ASSIGNMENTS\n"); + break; + default: + printf("ERROR: INVALID TUPLE DUMP MODE\n"); + return; + } + + if (mode != LSRA_DUMP_PRE) + { + printf("Incoming Parameters: "); + for (; currentRefPosition != refPositions.end() && currentRefPosition->refType != RefTypeBB; + ++currentRefPosition) + { + Interval* interval = currentRefPosition->getInterval(); + assert(interval != nullptr && interval->isLocalVar); + printf(" V%02d", interval->varNum); + if (mode == LSRA_DUMP_POST) + { + regNumber reg; + if (currentRefPosition->registerAssignment == RBM_NONE) + { + reg = REG_STK; + } + else + { + reg = currentRefPosition->assignedReg(); + } + LclVarDsc* varDsc = &(compiler->lvaTable[interval->varNum]); + printf("("); + regNumber assignedReg = varDsc->lvRegNum; + regNumber argReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK; + + assert(reg == assignedReg || varDsc->lvRegister == false); + if (reg != argReg) + { + printf(getRegName(argReg, isFloatRegType(interval->registerType))); + printf("=>"); + } + printf("%s)", getRegName(reg, isFloatRegType(interval->registerType))); + } + } + printf("\n"); + } + + for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) + { + currentLoc += 2; + + if (mode == LSRA_DUMP_REFPOS) + { + bool printedBlockHeader = false; + // We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks + for (; currentRefPosition != refPositions.end() && + (currentRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef || + (currentRefPosition->refType == RefTypeBB && !printedBlockHeader)); + ++currentRefPosition) + { + Interval* interval = nullptr; + if (currentRefPosition->isIntervalRef()) + { + interval = currentRefPosition->getInterval(); + } + switch (currentRefPosition->refType) + { + case RefTypeExpUse: + assert(interval != nullptr); + assert(interval->isLocalVar); + printf(" Exposed use of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum); + break; + case RefTypeDummyDef: + assert(interval != nullptr); + assert(interval->isLocalVar); + printf(" Dummy def of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum); + break; + case RefTypeBB: + block->dspBlockHeader(compiler); + printedBlockHeader = true; + printf("=====\n"); + break; + default: + printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum); + break; + } + } + } + else + { + block->dspBlockHeader(compiler); + printf("=====\n"); + } + if (mode == LSRA_DUMP_POST && block != compiler->fgFirstBB && block->bbNum <= bbNumMaxBeforeResolution) + { + printf("Predecessor for variable locations: BB%02u\n", blockInfo[block->bbNum].predBBNum); + dumpInVarToRegMap(block); + } + if (block->bbNum > bbNumMaxBeforeResolution) + { + SplitEdgeInfo splitEdgeInfo; + splitBBNumToTargetBBNumMap->Lookup(block->bbNum, &splitEdgeInfo); + assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution); + assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution); + printf("New block introduced for resolution from BB%02u to BB%02u\n", splitEdgeInfo.fromBBNum, + splitEdgeInfo.toBBNum); + } + + for (GenTree* node : LIR::AsRange(block).NonPhiNodes()) + { + GenTree* tree = node; + + genTreeOps oper = tree->OperGet(); + TreeNodeInfo& info = tree->gtLsraInfo; + if (tree->gtLsraInfo.isLsraAdded) + { + // This must be one of the nodes that we add during LSRA + + if (oper == GT_LCL_VAR) + { + info.srcCount = 0; + info.dstCount = 1; + } + else if (oper == GT_RELOAD || oper == GT_COPY) + { + info.srcCount = 1; + info.dstCount = 1; + } +#ifdef FEATURE_SIMD + else if (oper == GT_SIMD) + { + if (tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperSave) + { + info.srcCount = 1; + info.dstCount = 1; + } + else + { + assert(tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore); + info.srcCount = 2; + info.dstCount = 0; + } + } +#endif // FEATURE_SIMD + else + { + assert(oper == GT_SWAP); + info.srcCount = 2; + info.dstCount = 0; + } + info.internalIntCount = 0; + info.internalFloatCount = 0; + } + + int consume = info.srcCount; + int produce = info.dstCount; + regMaskTP killMask = RBM_NONE; + regMaskTP fixedMask = RBM_NONE; + + lsraDispNode(tree, mode, produce != 0 && mode != LSRA_DUMP_REFPOS); + + if (mode != LSRA_DUMP_REFPOS) + { + if (consume > 0) + { + printf("; "); + + bool first = true; + for (GenTree* operand : tree->Operands()) + { + DumpOperandDefs(operand, first, mode, operandString, operandStringLength); + } + } + } + else + { + // Print each RefPosition on a new line, but + // printing all the kills for each node on a single line + // and combining the fixed regs with their associated def or use + bool killPrinted = false; + RefPosition* lastFixedRegRefPos = nullptr; + for (; currentRefPosition != refPositions.end() && + (currentRefPosition->refType == RefTypeUse || currentRefPosition->refType == RefTypeFixedReg || + currentRefPosition->refType == RefTypeKill || currentRefPosition->refType == RefTypeDef) && + (currentRefPosition->nodeLocation == tree->gtSeqNum || + currentRefPosition->nodeLocation == tree->gtSeqNum + 1); + ++currentRefPosition) + { + Interval* interval = nullptr; + if (currentRefPosition->isIntervalRef()) + { + interval = currentRefPosition->getInterval(); + } + switch (currentRefPosition->refType) + { + case RefTypeUse: + if (currentRefPosition->isPhysRegRef) + { + printf("\n Use:R%d(#%d)", + currentRefPosition->getReg()->regNum, currentRefPosition->rpNum); + } + else + { + assert(interval != nullptr); + printf("\n Use:"); + interval->microDump(); + printf("(#%d)", currentRefPosition->rpNum); + if (currentRefPosition->isFixedRegRef) + { + assert(genMaxOneBit(currentRefPosition->registerAssignment)); + assert(lastFixedRegRefPos != nullptr); + printf(" Fixed:%s(#%d)", getRegName(currentRefPosition->assignedReg(), + isFloatRegType(interval->registerType)), + lastFixedRegRefPos->rpNum); + lastFixedRegRefPos = nullptr; + } + if (currentRefPosition->isLocalDefUse) + { + printf(" LocalDefUse"); + } + if (currentRefPosition->lastUse) + { + printf(" *"); + } + } + break; + case RefTypeDef: + { + // Print each def on a new line + assert(interval != nullptr); + printf("\n Def:"); + interval->microDump(); + printf("(#%d)", currentRefPosition->rpNum); + if (currentRefPosition->isFixedRegRef) + { + assert(genMaxOneBit(currentRefPosition->registerAssignment)); + printf(" %s", getRegName(currentRefPosition->assignedReg(), + isFloatRegType(interval->registerType))); + } + if (currentRefPosition->isLocalDefUse) + { + printf(" LocalDefUse"); + } + if (currentRefPosition->lastUse) + { + printf(" *"); + } + if (interval->relatedInterval != nullptr) + { + printf(" Pref:"); + interval->relatedInterval->microDump(); + } + } + break; + case RefTypeKill: + if (!killPrinted) + { + printf("\n Kill: "); + killPrinted = true; + } + printf(getRegName(currentRefPosition->assignedReg(), + isFloatRegType(currentRefPosition->getReg()->registerType))); + printf(" "); + break; + case RefTypeFixedReg: + lastFixedRegRefPos = currentRefPosition; + break; + default: + printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum); + break; + } + } + } + printf("\n"); + if (info.internalIntCount != 0 && mode != LSRA_DUMP_REFPOS) + { + printf("\tinternal (%d):\t", info.internalIntCount); + if (mode == LSRA_DUMP_POST) + { + dumpRegMask(tree->gtRsvdRegs); + } + else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT)) + { + dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT)); + } + printf("\n"); + } + if (info.internalFloatCount != 0 && mode != LSRA_DUMP_REFPOS) + { + printf("\tinternal (%d):\t", info.internalFloatCount); + if (mode == LSRA_DUMP_POST) + { + dumpRegMask(tree->gtRsvdRegs); + } + else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT)) + { + dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT)); + } + printf("\n"); + } + } + if (mode == LSRA_DUMP_POST) + { + dumpOutVarToRegMap(block); + } + printf("\n"); + } + printf("\n\n"); +} + +void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, + Interval* interval, + regNumber reg, + BasicBlock* currentBlock) +{ + if (!(VERBOSE)) + { + return; + } + switch (event) + { + // Conflicting def/use + case LSRA_EVENT_DEFUSE_CONFLICT: + if (!dumpTerse) + { + printf(" Def and Use have conflicting register requirements:"); + } + else + { + printf("DUconflict "); + dumpRegRecords(); + } + break; + case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE: + if (!dumpTerse) + { + printf(" Can't change useAssignment "); + } + break; + case LSRA_EVENT_DEFUSE_CASE1: + if (!dumpTerse) + { + printf(" case #1, use the defRegAssignment\n"); + } + else + { + printf(indentFormat, " case #1 use defRegAssignment"); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_DEFUSE_CASE2: + if (!dumpTerse) + { + printf(" case #2, use the useRegAssignment\n"); + } + else + { + printf(indentFormat, " case #2 use useRegAssignment"); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_DEFUSE_CASE3: + if (!dumpTerse) + { + printf(" case #3, change the defRegAssignment to the use regs\n"); + } + else + { + printf(indentFormat, " case #3 use useRegAssignment"); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_DEFUSE_CASE4: + if (!dumpTerse) + { + printf(" case #4, change the useRegAssignment to the def regs\n"); + } + else + { + printf(indentFormat, " case #4 use defRegAssignment"); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_DEFUSE_CASE5: + if (!dumpTerse) + { + printf(" case #5, Conflicting Def and Use single-register requirements require copies - set def to all " + "regs of the appropriate type\n"); + } + else + { + printf(indentFormat, " case #5 set def to all regs"); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_DEFUSE_CASE6: + if (!dumpTerse) + { + printf(" case #6, Conflicting Def and Use register requirements require a copy\n"); + } + else + { + printf(indentFormat, " case #6 need a copy"); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + + case LSRA_EVENT_SPILL: + if (!dumpTerse) + { + printf("Spilled:\n"); + interval->dump(); + } + else + { + assert(interval != nullptr && interval->assignedReg != nullptr); + printf("Spill %-4s ", getRegName(interval->assignedReg->regNum)); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_SPILL_EXTENDED_LIFETIME: + if (!dumpTerse) + { + printf(" Spilled extended lifetime var V%02u at last use; not marked for actual spill.", + interval->intervalIndex); + } + break; + + // Restoring the previous register + case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL: + assert(interval != nullptr); + if (!dumpTerse) + { + printf(" Assign register %s to previous interval Ivl:%d after spill\n", getRegName(reg), + interval->intervalIndex); + } + else + { + // If we spilled, then the dump is already pre-indented, but we need to pre-indent for the subsequent + // allocation + // with a dumpEmptyRefPosition(). + printf("SRstr %-4s ", getRegName(reg)); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL: + assert(interval != nullptr); + if (!dumpTerse) + { + printf(" Assign register %s to previous interval Ivl:%d\n", getRegName(reg), interval->intervalIndex); + } + else + { + if (activeRefPosition == nullptr) + { + printf(emptyRefPositionFormat, ""); + } + printf("Restr %-4s ", getRegName(reg)); + dumpRegRecords(); + if (activeRefPosition != nullptr) + { + printf(emptyRefPositionFormat, ""); + } + } + break; + + // Done with GC Kills + case LSRA_EVENT_DONE_KILL_GC_REFS: + printf("DoneKillGC "); + break; + + // Block boundaries + case LSRA_EVENT_START_BB: + assert(currentBlock != nullptr); + if (!dumpTerse) + { + printf("\n\n Live Vars(Regs) at start of BB%02u (from pred BB%02u):", currentBlock->bbNum, + blockInfo[currentBlock->bbNum].predBBNum); + dumpVarToRegMap(inVarToRegMaps[currentBlock->bbNum]); + } + break; + case LSRA_EVENT_END_BB: + if (!dumpTerse) + { + printf("\n\n Live Vars(Regs) after BB%02u:", currentBlock->bbNum); + dumpVarToRegMap(outVarToRegMaps[currentBlock->bbNum]); + } + break; + + case LSRA_EVENT_FREE_REGS: + if (!dumpTerse) + { + printf("Freeing registers:\n"); + } + break; + + // Characteristics of the current RefPosition + case LSRA_EVENT_INCREMENT_RANGE_END: + if (!dumpTerse) + { + printf(" Incrementing nextPhysRegLocation for %s\n", getRegName(reg)); + } + // else ??? + break; + case LSRA_EVENT_LAST_USE: + if (!dumpTerse) + { + printf(" Last use, marked to be freed\n"); + } + break; + case LSRA_EVENT_LAST_USE_DELAYED: + if (!dumpTerse) + { + printf(" Last use, marked to be freed (delayed)\n"); + } + break; + case LSRA_EVENT_NEEDS_NEW_REG: + if (!dumpTerse) + { + printf(" Needs new register; mark %s to be freed\n", getRegName(reg)); + } + else + { + printf("Free %-4s ", getRegName(reg)); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + + // Allocation decisions + case LSRA_EVENT_FIXED_REG: + case LSRA_EVENT_EXP_USE: + if (!dumpTerse) + { + printf("No allocation\n"); + } + else + { + printf("Keep %-4s ", getRegName(reg)); + } + break; + case LSRA_EVENT_ZERO_REF: + assert(interval != nullptr && interval->isLocalVar); + if (!dumpTerse) + { + printf("Marking V%02u as last use there are no actual references\n", interval->varNum); + } + else + { + printf("NoRef "); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_KEPT_ALLOCATION: + if (!dumpTerse) + { + printf("already allocated %4s\n", getRegName(reg)); + } + else + { + printf("Keep %-4s ", getRegName(reg)); + } + break; + case LSRA_EVENT_COPY_REG: + assert(interval != nullptr && interval->recentRefPosition != nullptr); + if (!dumpTerse) + { + printf("allocated %s as copyReg\n\n", getRegName(reg)); + } + else + { + printf("Copy %-4s ", getRegName(reg)); + } + break; + case LSRA_EVENT_MOVE_REG: + assert(interval != nullptr && interval->recentRefPosition != nullptr); + if (!dumpTerse) + { + printf(" needs a new register; marked as moveReg\n"); + } + else + { + printf("Move %-4s ", getRegName(reg)); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_ALLOC_REG: + if (!dumpTerse) + { + printf("allocated %s\n", getRegName(reg)); + } + else + { + printf("Alloc %-4s ", getRegName(reg)); + } + break; + case LSRA_EVENT_REUSE_REG: + if (!dumpTerse) + { + printf("reused constant in %s\n", getRegName(reg)); + } + else + { + printf("Reuse %-4s ", getRegName(reg)); + } + break; + case LSRA_EVENT_ALLOC_SPILLED_REG: + if (!dumpTerse) + { + printf("allocated spilled register %s\n", getRegName(reg)); + } + else + { + printf("Steal %-4s ", getRegName(reg)); + } + break; + case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED: + assert(interval != nullptr && interval->isLocalVar); + if (!dumpTerse) + { + printf("Not allocating an entry register for V%02u due to low ref count\n", interval->varNum); + } + else + { + printf("LoRef "); + } + break; + case LSRA_EVENT_NO_REG_ALLOCATED: + if (!dumpTerse) + { + printf("no register allocated\n"); + } + else + { + printf("NoReg "); + } + break; + case LSRA_EVENT_RELOAD: + if (!dumpTerse) + { + printf(" Marked for reload\n"); + } + else + { + printf("ReLod %-4s ", getRegName(reg)); + dumpRegRecords(); + dumpEmptyRefPosition(); + } + break; + case LSRA_EVENT_SPECIAL_PUTARG: + if (!dumpTerse) + { + printf(" Special case of putArg - using lclVar that's in the expected reg\n"); + } + else + { + printf("PtArg %-4s ", getRegName(reg)); + } + break; + default: + break; + } +} + +//------------------------------------------------------------------------ +// dumpRegRecordHeader: Dump the header for a column-based dump of the register state. +// +// Arguments: +// None. +// +// Return Value: +// None. +// +// Assumptions: +// Reg names fit in 4 characters (minimum width of the columns) +// +// Notes: +// In order to make the table as dense as possible (for ease of reading the dumps), +// we determine the minimum regColumnWidth width required to represent: +// regs, by name (e.g. eax or xmm0) - this is fixed at 4 characters. +// intervals, as Vnn for lclVar intervals, or as I<num> for other intervals. +// The table is indented by the amount needed for dumpRefPositionShort, which is +// captured in shortRefPositionDumpWidth. +// +void LinearScan::dumpRegRecordHeader() +{ + printf("The following table has one or more rows for each RefPosition that is handled during allocation.\n" + "The first column provides the basic information about the RefPosition, with its type (e.g. Def,\n" + "Use, Fixd) followed by a '*' if it is a last use, and a 'D' if it is delayRegFree, and then the\n" + "action taken during allocation (e.g. Alloc a new register, or Keep an existing one).\n" + "The subsequent columns show the Interval occupying each register, if any, followed by 'a' if it is\n" + "active, and 'i'if it is inactive. Columns are only printed up to the last modifed register, which\n" + "may increase during allocation, in which case additional columns will appear. Registers which are\n" + "not marked modified have ---- in their column.\n\n"); + + // First, determine the width of each register column (which holds a reg name in the + // header, and an interval name in each subsequent row). + int intervalNumberWidth = (int)log10((double)intervals.size()) + 1; + // The regColumnWidth includes the identifying character (I or V) and an 'i' or 'a' (inactive or active) + regColumnWidth = intervalNumberWidth + 2; + if (regColumnWidth < 4) + { + regColumnWidth = 4; + } + sprintf_s(intervalNameFormat, MAX_FORMAT_CHARS, "%%c%%-%dd", regColumnWidth - 2); + sprintf_s(regNameFormat, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth); + + // Next, determine the width of the short RefPosition (see dumpRefPositionShort()). + // This is in the form: + // nnn.#mmm NAME TYPEld + // Where: + // nnn is the Location, right-justified to the width needed for the highest location. + // mmm is the RefPosition rpNum, left-justified to the width needed for the highest rpNum. + // NAME is dumped by dumpReferentName(), and is "regColumnWidth". + // TYPE is RefTypeNameShort, and is 4 characters + // l is either '*' (if a last use) or ' ' (otherwise) + // d is either 'D' (if a delayed use) or ' ' (otherwise) + + maxNodeLocation = (maxNodeLocation == 0) + ? 1 + : maxNodeLocation; // corner case of a method with an infinite loop without any gentree nodes + assert(maxNodeLocation >= 1); + assert(refPositions.size() >= 1); + int nodeLocationWidth = (int)log10((double)maxNodeLocation) + 1; + int refPositionWidth = (int)log10((double)refPositions.size()) + 1; + int refTypeInfoWidth = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */; + int locationAndRPNumWidth = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */; + int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth; + sprintf_s(shortRefPositionFormat, MAX_FORMAT_CHARS, "%%%dd.#%%-%dd ", nodeLocationWidth, refPositionWidth); + sprintf_s(emptyRefPositionFormat, MAX_FORMAT_CHARS, "%%-%ds", shortRefPositionDumpWidth); + + // The width of the "allocation info" + // - a 5-character allocation decision + // - a space + // - a 4-character register + // - a space + int allocationInfoWidth = 5 + 1 + 4 + 1; + + // Next, determine the width of the legend for each row. This includes: + // - a short RefPosition dump (shortRefPositionDumpWidth), which includes a space + // - the allocation info (allocationInfoWidth), which also includes a space + + regTableIndent = shortRefPositionDumpWidth + allocationInfoWidth; + + // BBnn printed left-justified in the NAME Typeld and allocationInfo space. + int bbDumpWidth = regColumnWidth + 1 + refTypeInfoWidth + allocationInfoWidth; + int bbNumWidth = (int)log10((double)compiler->fgBBNumMax) + 1; + // In the unlikely event that BB numbers overflow the space, we'll simply omit the predBB + int predBBNumDumpSpace = regTableIndent - locationAndRPNumWidth - bbNumWidth - 9; // 'BB' + ' PredBB' + if (predBBNumDumpSpace < bbNumWidth) + { + sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd", shortRefPositionDumpWidth - 2); + } + else + { + sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd PredBB%%-%dd", bbNumWidth, predBBNumDumpSpace); + } + + if (compiler->shouldDumpASCIITrees()) + { + columnSeparator = "|"; + line = "-"; + leftBox = "+"; + middleBox = "+"; + rightBox = "+"; + } + else + { + columnSeparator = "\xe2\x94\x82"; + line = "\xe2\x94\x80"; + leftBox = "\xe2\x94\x9c"; + middleBox = "\xe2\x94\xbc"; + rightBox = "\xe2\x94\xa4"; + } + sprintf_s(indentFormat, MAX_FORMAT_CHARS, "%%-%ds", regTableIndent); + + // Now, set up the legend format for the RefPosition info + sprintf_s(legendFormat, MAX_LEGEND_FORMAT_CHARS, "%%-%d.%ds%%-%d.%ds%%-%ds%%s", nodeLocationWidth + 1, + nodeLocationWidth + 1, refPositionWidth + 2, refPositionWidth + 2, regColumnWidth + 1); + + // Finally, print a "title row" including the legend and the reg names + dumpRegRecordTitle(); +} + +int LinearScan::getLastUsedRegNumIndex() +{ + int lastUsedRegNumIndex = 0; + regMaskTP usedRegsMask = compiler->codeGen->regSet.rsGetModifiedRegsMask(); + int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST; + for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++) + { + if ((usedRegsMask & genRegMask((regNumber)regNumIndex)) != 0) + { + lastUsedRegNumIndex = regNumIndex; + } + } + return lastUsedRegNumIndex; +} + +void LinearScan::dumpRegRecordTitleLines() +{ + for (int i = 0; i < regTableIndent; i++) + { + printf("%s", line); + } + int lastUsedRegNumIndex = getLastUsedRegNumIndex(); + for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++) + { + printf("%s", middleBox); + for (int i = 0; i < regColumnWidth; i++) + { + printf("%s", line); + } + } + printf("%s\n", rightBox); +} +void LinearScan::dumpRegRecordTitle() +{ + dumpRegRecordTitleLines(); + + // Print out the legend for the RefPosition info + printf(legendFormat, "Loc ", "RP# ", "Name ", "Type Action Reg "); + + // Print out the register name column headers + char columnFormatArray[MAX_FORMAT_CHARS]; + sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%s%%-%d.%ds", columnSeparator, regColumnWidth, regColumnWidth); + int lastUsedRegNumIndex = getLastUsedRegNumIndex(); + for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++) + { + regNumber regNum = (regNumber)regNumIndex; + const char* regName = getRegName(regNum); + printf(columnFormatArray, regName); + } + printf("%s\n", columnSeparator); + + rowCountSinceLastTitle = 0; + + dumpRegRecordTitleLines(); +} + +void LinearScan::dumpRegRecords() +{ + static char columnFormatArray[18]; + int lastUsedRegNumIndex = getLastUsedRegNumIndex(); + regMaskTP usedRegsMask = compiler->codeGen->regSet.rsGetModifiedRegsMask(); + + for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++) + { + printf("%s", columnSeparator); + RegRecord& regRecord = physRegs[regNumIndex]; + Interval* interval = regRecord.assignedInterval; + if (interval != nullptr) + { + dumpIntervalName(interval); + char activeChar = interval->isActive ? 'a' : 'i'; + printf("%c", activeChar); + } + else if (regRecord.isBusyUntilNextKill) + { + printf(columnFormatArray, "Busy"); + } + else if ((usedRegsMask & genRegMask((regNumber)regNumIndex)) == 0) + { + sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth); + printf(columnFormatArray, "----"); + } + else + { + sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth); + printf(columnFormatArray, ""); + } + } + printf("%s\n", columnSeparator); + + if (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES) + { + dumpRegRecordTitle(); + } + rowCountSinceLastTitle++; +} + +void LinearScan::dumpIntervalName(Interval* interval) +{ + char intervalChar; + if (interval->isLocalVar) + { + intervalChar = 'V'; + } + else if (interval->isConstant) + { + intervalChar = 'C'; + } + else + { + intervalChar = 'I'; + } + printf(intervalNameFormat, intervalChar, interval->intervalIndex); +} + +void LinearScan::dumpEmptyRefPosition() +{ + printf(emptyRefPositionFormat, ""); +} + +// Note that the size of this dump is computed in dumpRegRecordHeader(). +// +void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock) +{ + BasicBlock* block = currentBlock; + if (refPosition->refType == RefTypeBB) + { + // Always print a title row before a RefTypeBB (except for the first, because we + // will already have printed it before the parameters) + if (refPosition->refType == RefTypeBB && block != compiler->fgFirstBB && block != nullptr) + { + dumpRegRecordTitle(); + } + } + printf(shortRefPositionFormat, refPosition->nodeLocation, refPosition->rpNum); + if (refPosition->refType == RefTypeBB) + { + if (block == nullptr) + { + printf(regNameFormat, "END"); + printf(" "); + printf(regNameFormat, ""); + } + else + { + printf(bbRefPosFormat, block->bbNum, block == compiler->fgFirstBB ? 0 : blockInfo[block->bbNum].predBBNum); + } + } + else if (refPosition->isIntervalRef()) + { + Interval* interval = refPosition->getInterval(); + dumpIntervalName(interval); + char lastUseChar = ' '; + char delayChar = ' '; + if (refPosition->lastUse) + { + lastUseChar = '*'; + if (refPosition->delayRegFree) + { + delayChar = 'D'; + } + } + printf(" %s%c%c ", getRefTypeShortName(refPosition->refType), lastUseChar, delayChar); + } + else if (refPosition->isPhysRegRef) + { + RegRecord* regRecord = refPosition->getReg(); + printf(regNameFormat, getRegName(regRecord->regNum)); + printf(" %s ", getRefTypeShortName(refPosition->refType)); + } + else + { + assert(refPosition->refType == RefTypeKillGCRefs); + // There's no interval or reg name associated with this. + printf(regNameFormat, " "); + printf(" %s ", getRefTypeShortName(refPosition->refType)); + } +} + +//------------------------------------------------------------------------ +// LinearScan::IsResolutionMove: +// Returns true if the given node is a move inserted by LSRA +// resolution. +// +// Arguments: +// node - the node to check. +// +bool LinearScan::IsResolutionMove(GenTree* node) +{ + if (!node->gtLsraInfo.isLsraAdded) + { + return false; + } + + switch (node->OperGet()) + { + case GT_LCL_VAR: + case GT_COPY: + return node->gtLsraInfo.isLocalDefUse; + + case GT_SWAP: + return true; + + default: + return false; + } +} + +//------------------------------------------------------------------------ +// LinearScan::IsResolutionNode: +// Returns true if the given node is either a move inserted by LSRA +// resolution or an operand to such a move. +// +// Arguments: +// containingRange - the range that contains the node to check. +// node - the node to check. +// +bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node) +{ + for (;;) + { + if (IsResolutionMove(node)) + { + return true; + } + + if (!node->gtLsraInfo.isLsraAdded || (node->OperGet() != GT_LCL_VAR)) + { + return false; + } + + LIR::Use use; + bool foundUse = containingRange.TryGetUse(node, &use); + assert(foundUse); + + node = use.User(); + } +} + +//------------------------------------------------------------------------ +// verifyFinalAllocation: Traverse the RefPositions and verify various invariants. +// +// Arguments: +// None. +// +// Return Value: +// None. +// +// Notes: +// If verbose is set, this will also dump a table of the final allocations. +void LinearScan::verifyFinalAllocation() +{ + if (VERBOSE) + { + printf("\nFinal allocation\n"); + } + + // Clear register assignments. + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* physRegRecord = getRegisterRecord(reg); + physRegRecord->assignedInterval = nullptr; + } + + for (auto& interval : intervals) + { + interval.assignedReg = nullptr; + interval.physReg = REG_NA; + } + + DBEXEC(VERBOSE, dumpRegRecordTitle()); + + BasicBlock* currentBlock = nullptr; + GenTree* firstBlockEndResolutionNode = nullptr; + regMaskTP regsToFree = RBM_NONE; + regMaskTP delayRegsToFree = RBM_NONE; + LsraLocation currentLocation = MinLocation; + for (auto& refPosition : refPositions) + { + RefPosition* currentRefPosition = &refPosition; + Interval* interval = nullptr; + RegRecord* regRecord = nullptr; + regNumber regNum = REG_NA; + if (currentRefPosition->refType == RefTypeBB) + { + regsToFree |= delayRegsToFree; + delayRegsToFree = RBM_NONE; + // For BB RefPositions, wait until we dump the "end of block" info before dumping the basic RefPosition + // info. + } + else + { + // For other RefPosition types, we can dump the basic RefPosition info now. + DBEXEC(VERBOSE, dumpRefPositionShort(currentRefPosition, currentBlock)); + + if (currentRefPosition->isPhysRegRef) + { + regRecord = currentRefPosition->getReg(); + regRecord->recentRefPosition = currentRefPosition; + regNum = regRecord->regNum; + } + else if (currentRefPosition->isIntervalRef()) + { + interval = currentRefPosition->getInterval(); + interval->recentRefPosition = currentRefPosition; + if (currentRefPosition->registerAssignment != RBM_NONE) + { + if (!genMaxOneBit(currentRefPosition->registerAssignment)) + { + assert(currentRefPosition->refType == RefTypeExpUse || + currentRefPosition->refType == RefTypeDummyDef); + } + else + { + regNum = currentRefPosition->assignedReg(); + regRecord = getRegisterRecord(regNum); + } + } + } + } + + LsraLocation newLocation = currentRefPosition->nodeLocation; + + if (newLocation > currentLocation) + { + // Free Registers. + // We could use the freeRegisters() method, but we'd have to carefully manage the active intervals. + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + regMaskTP regMask = genRegMask(reg); + if ((regsToFree & regMask) != RBM_NONE) + { + RegRecord* physRegRecord = getRegisterRecord(reg); + physRegRecord->assignedInterval = nullptr; + } + } + regsToFree = delayRegsToFree; + regsToFree = RBM_NONE; + } + currentLocation = newLocation; + + switch (currentRefPosition->refType) + { + case RefTypeBB: + { + if (currentBlock == nullptr) + { + currentBlock = startBlockSequence(); + } + else + { + // Verify the resolution moves at the end of the previous block. + for (GenTree* node = firstBlockEndResolutionNode; node != nullptr; node = node->gtNext) + { + // Only verify nodes that are actually moves; don't bother with the nodes that are + // operands to moves. + if (IsResolutionMove(node)) + { + verifyResolutionMove(node, currentLocation); + } + } + + // Validate the locations at the end of the previous block. + VarToRegMap outVarToRegMap = outVarToRegMaps[currentBlock->bbNum]; + VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveOut, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + regNumber regNum = getVarReg(outVarToRegMap, varNum); + interval = getIntervalForLocalVar(varNum); + assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK)); + interval->physReg = REG_NA; + interval->assignedReg = nullptr; + interval->isActive = false; + } + + // Clear register assignments. + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* physRegRecord = getRegisterRecord(reg); + physRegRecord->assignedInterval = nullptr; + } + + // Now, record the locations at the beginning of this block. + currentBlock = moveToNextBlock(); + } + + if (currentBlock != nullptr) + { + VarToRegMap inVarToRegMap = inVarToRegMaps[currentBlock->bbNum]; + VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + regNumber regNum = getVarReg(inVarToRegMap, varNum); + interval = getIntervalForLocalVar(varNum); + interval->physReg = regNum; + interval->assignedReg = &(physRegs[regNum]); + interval->isActive = true; + physRegs[regNum].assignedInterval = interval; + } + + if (VERBOSE) + { + dumpRefPositionShort(currentRefPosition, currentBlock); + dumpRegRecords(); + } + + // Finally, handle the resolution moves, if any, at the beginning of the next block. + firstBlockEndResolutionNode = nullptr; + bool foundNonResolutionNode = false; + + LIR::Range& currentBlockRange = LIR::AsRange(currentBlock); + for (GenTree* node : currentBlockRange.NonPhiNodes()) + { + if (IsResolutionNode(currentBlockRange, node)) + { + if (foundNonResolutionNode) + { + firstBlockEndResolutionNode = node; + break; + } + else if (IsResolutionMove(node)) + { + // Only verify nodes that are actually moves; don't bother with the nodes that are + // operands to moves. + verifyResolutionMove(node, currentLocation); + } + } + else + { + foundNonResolutionNode = true; + } + } + } + } + + break; + + case RefTypeKill: + assert(regRecord != nullptr); + assert(regRecord->assignedInterval == nullptr); + dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); + break; + case RefTypeFixedReg: + assert(regRecord != nullptr); + dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); + break; + + case RefTypeUpperVectorSaveDef: + case RefTypeUpperVectorSaveUse: + case RefTypeDef: + case RefTypeUse: + case RefTypeParamDef: + case RefTypeZeroInit: + assert(interval != nullptr); + + if (interval->isSpecialPutArg) + { + dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, interval, regNum); + break; + } + if (currentRefPosition->reload) + { + interval->isActive = true; + assert(regNum != REG_NA); + interval->physReg = regNum; + interval->assignedReg = regRecord; + regRecord->assignedInterval = interval; + dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, nullptr, regRecord->regNum, currentBlock); + } + if (regNum == REG_NA) + { + dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, interval); + } + else if (RefTypeIsDef(currentRefPosition->refType)) + { + interval->isActive = true; + if (VERBOSE) + { + if (interval->isConstant && (currentRefPosition->treeNode != nullptr) && + currentRefPosition->treeNode->IsReuseRegVal()) + { + dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, regRecord->regNum, currentBlock); + } + else + { + dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, regRecord->regNum, currentBlock); + } + } + } + else if (currentRefPosition->copyReg) + { + dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, interval, regRecord->regNum, currentBlock); + } + else if (currentRefPosition->moveReg) + { + assert(interval->assignedReg != nullptr); + interval->assignedReg->assignedInterval = nullptr; + interval->physReg = regNum; + interval->assignedReg = regRecord; + regRecord->assignedInterval = interval; + if (VERBOSE) + { + printf("Move %-4s ", getRegName(regRecord->regNum)); + } + } + else + { + dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); + } + if (currentRefPosition->lastUse || currentRefPosition->spillAfter) + { + interval->isActive = false; + } + if (regNum != REG_NA) + { + if (currentRefPosition->spillAfter) + { + if (VERBOSE) + { + dumpRegRecords(); + dumpEmptyRefPosition(); + printf("Spill %-4s ", getRegName(regNum)); + } + } + else if (currentRefPosition->copyReg) + { + regRecord->assignedInterval = interval; + } + else + { + interval->physReg = regNum; + interval->assignedReg = regRecord; + regRecord->assignedInterval = interval; + } + } + break; + case RefTypeKillGCRefs: + // No action to take. + // However, we will assert that, at resolution time, no registers contain GC refs. + { + DBEXEC(VERBOSE, printf(" ")); + regMaskTP candidateRegs = currentRefPosition->registerAssignment; + while (candidateRegs != RBM_NONE) + { + regMaskTP nextRegBit = genFindLowestBit(candidateRegs); + candidateRegs &= ~nextRegBit; + regNumber nextReg = genRegNumFromMask(nextRegBit); + RegRecord* regRecord = getRegisterRecord(nextReg); + Interval* assignedInterval = regRecord->assignedInterval; + assert(assignedInterval == nullptr || !varTypeIsGC(assignedInterval->registerType)); + } + } + break; + + case RefTypeExpUse: + case RefTypeDummyDef: + // Do nothing; these will be handled by the RefTypeBB. + DBEXEC(VERBOSE, printf(" ")); + break; + + case RefTypeInvalid: + // for these 'currentRefPosition->refType' values, No action to take + break; + } + + if (currentRefPosition->refType != RefTypeBB) + { + DBEXEC(VERBOSE, dumpRegRecords()); + if (interval != nullptr) + { + if (currentRefPosition->copyReg) + { + assert(interval->physReg != regNum); + regRecord->assignedInterval = nullptr; + assert(interval->assignedReg != nullptr); + regRecord = interval->assignedReg; + } + if (currentRefPosition->spillAfter || currentRefPosition->lastUse) + { + interval->physReg = REG_NA; + interval->assignedReg = nullptr; + + // regRegcord could be null if RefPosition is to be allocated a + // reg only if profitable. + if (regRecord != nullptr) + { + regRecord->assignedInterval = nullptr; + } + else + { + assert(currentRefPosition->AllocateIfProfitable()); + } + } + } + } + } + + // Now, verify the resolution blocks. + // Currently these are nearly always at the end of the method, but that may not alwyas be the case. + // So, we'll go through all the BBs looking for blocks whose bbNum is greater than bbNumMaxBeforeResolution. + for (BasicBlock* currentBlock = compiler->fgFirstBB; currentBlock != nullptr; currentBlock = currentBlock->bbNext) + { + if (currentBlock->bbNum > bbNumMaxBeforeResolution) + { + if (VERBOSE) + { + dumpRegRecordTitle(); + printf(shortRefPositionFormat, 0, 0); + assert(currentBlock->bbPreds != nullptr && currentBlock->bbPreds->flBlock != nullptr); + printf(bbRefPosFormat, currentBlock->bbNum, currentBlock->bbPreds->flBlock->bbNum); + dumpRegRecords(); + } + + // Clear register assignments. + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* physRegRecord = getRegisterRecord(reg); + physRegRecord->assignedInterval = nullptr; + } + + // Set the incoming register assignments + VarToRegMap inVarToRegMap = getInVarToRegMap(currentBlock->bbNum); + VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + regNumber regNum = getVarReg(inVarToRegMap, varNum); + Interval* interval = getIntervalForLocalVar(varNum); + interval->physReg = regNum; + interval->assignedReg = &(physRegs[regNum]); + interval->isActive = true; + physRegs[regNum].assignedInterval = interval; + } + + // Verify the moves in this block + LIR::Range& currentBlockRange = LIR::AsRange(currentBlock); + for (GenTree* node : currentBlockRange.NonPhiNodes()) + { + assert(IsResolutionNode(currentBlockRange, node)); + if (IsResolutionMove(node)) + { + // Only verify nodes that are actually moves; don't bother with the nodes that are + // operands to moves. + verifyResolutionMove(node, currentLocation); + } + } + + // Verify the outgoing register assignments + { + VarToRegMap outVarToRegMap = getOutVarToRegMap(currentBlock->bbNum); + VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveOut, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + regNumber regNum = getVarReg(outVarToRegMap, varNum); + Interval* interval = getIntervalForLocalVar(varNum); + assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK)); + interval->physReg = REG_NA; + interval->assignedReg = nullptr; + interval->isActive = false; + } + } + } + } + + DBEXEC(VERBOSE, printf("\n")); +} + +//------------------------------------------------------------------------ +// verifyResolutionMove: Verify a resolution statement. Called by verifyFinalAllocation() +// +// Arguments: +// resolutionMove - A GenTree* that must be a resolution move. +// currentLocation - The LsraLocation of the most recent RefPosition that has been verified. +// +// Return Value: +// None. +// +// Notes: +// If verbose is set, this will also dump the moves into the table of final allocations. +void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation currentLocation) +{ + GenTree* dst = resolutionMove; + assert(IsResolutionMove(dst)); + + if (dst->OperGet() == GT_SWAP) + { + GenTreeLclVarCommon* left = dst->gtGetOp1()->AsLclVarCommon(); + GenTreeLclVarCommon* right = dst->gtGetOp2()->AsLclVarCommon(); + regNumber leftRegNum = left->gtRegNum; + regNumber rightRegNum = right->gtRegNum; + Interval* leftInterval = getIntervalForLocalVar(left->gtLclNum); + Interval* rightInterval = getIntervalForLocalVar(right->gtLclNum); + assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum); + leftInterval->physReg = rightRegNum; + rightInterval->physReg = leftRegNum; + physRegs[rightRegNum].assignedInterval = leftInterval; + physRegs[leftRegNum].assignedInterval = rightInterval; + if (VERBOSE) + { + printf(shortRefPositionFormat, currentLocation, 0); + dumpIntervalName(leftInterval); + printf(" Swap "); + printf(" %-4s ", getRegName(rightRegNum)); + dumpRegRecords(); + printf(shortRefPositionFormat, currentLocation, 0); + dumpIntervalName(rightInterval); + printf(" \" "); + printf(" %-4s ", getRegName(leftRegNum)); + dumpRegRecords(); + } + return; + } + regNumber dstRegNum = dst->gtRegNum; + regNumber srcRegNum; + GenTreeLclVarCommon* lcl; + if (dst->OperGet() == GT_COPY) + { + lcl = dst->gtGetOp1()->AsLclVarCommon(); + srcRegNum = lcl->gtRegNum; + } + else + { + lcl = dst->AsLclVarCommon(); + if ((lcl->gtFlags & GTF_SPILLED) != 0) + { + srcRegNum = REG_STK; + } + else + { + assert((lcl->gtFlags & GTF_SPILL) != 0); + srcRegNum = dstRegNum; + dstRegNum = REG_STK; + } + } + Interval* interval = getIntervalForLocalVar(lcl->gtLclNum); + assert(interval->physReg == srcRegNum || (srcRegNum == REG_STK && interval->physReg == REG_NA)); + if (srcRegNum != REG_STK) + { + physRegs[srcRegNum].assignedInterval = nullptr; + } + if (dstRegNum != REG_STK) + { + interval->physReg = dstRegNum; + interval->assignedReg = &(physRegs[dstRegNum]); + physRegs[dstRegNum].assignedInterval = interval; + interval->isActive = true; + } + else + { + interval->physReg = REG_NA; + interval->assignedReg = nullptr; + interval->isActive = false; + } + if (VERBOSE) + { + printf(shortRefPositionFormat, currentLocation, 0); + dumpIntervalName(interval); + printf(" Move "); + printf(" %-4s ", getRegName(dstRegNum)); + dumpRegRecords(); + } +} +#endif // DEBUG + +#endif // !LEGACY_BACKEND |