diff options
author | Carol Eidt <carol.eidt@microsoft.com> | 2018-01-28 16:20:51 -0800 |
---|---|---|
committer | Carol Eidt <carol.eidt@microsoft.com> | 2018-02-01 14:12:14 -0800 |
commit | 6b0f3f88535968aaeddcf3da950fe5988ed3d459 (patch) | |
tree | 2cdd92831047cd1a7a75ce14cb6b4f8a47e1e281 /src/jit/lsrabuild.cpp | |
parent | 01c93b958e67a3e02d00be287cb2e15e7e9ea3e3 (diff) | |
download | coreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.tar.gz coreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.tar.bz2 coreclr-6b0f3f88535968aaeddcf3da950fe5988ed3d459.zip |
Refactor RefPosition and Interval Building
Move code for building `RefPosition`s and `Interval`s out of lsra.cpp into lsrabuild.cpp. Also, move common code from lsraarm*.cpp and lsraxarch.cpp to lsrabuild.cpp.
Maintain the `ListNodePool` on the `LinearScan` object to be used by all the building methods.
Rename `TreeNodeInfoInit` methods to `Build`, to more accurately reflect the next round of changes where they will build the `RefPosition`s directly.
Diffstat (limited to 'src/jit/lsrabuild.cpp')
-rw-r--r-- | src/jit/lsrabuild.cpp | 3205 |
1 files changed, 3205 insertions, 0 deletions
diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp new file mode 100644 index 0000000000..0d3cccb6b3 --- /dev/null +++ b/src/jit/lsrabuild.cpp @@ -0,0 +1,3205 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Interval and RefPosition Building XX +XX XX +XX This contains the logic for constructing Intervals and RefPositions that XX +XX is common across architectures. See lsra{arch}.cpp for the architecture- XX +XX specific methods for building. XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator + +#include "lsra.h" + +//------------------------------------------------------------------------ +// LocationInfoListNodePool::LocationInfoListNodePool: +// Creates a pool of `LocationInfoListNode` values. +// +// Arguments: +// compiler - The compiler context. +// preallocate - The number of nodes to preallocate. +// +LocationInfoListNodePool::LocationInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler) +{ + if (preallocate > 0) + { + size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate; + LocationInfoListNode* preallocatedNodes = + reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA)); + + LocationInfoListNode* head = preallocatedNodes; + head->m_next = nullptr; + + for (unsigned i = 1; i < preallocate; i++) + { + LocationInfoListNode* node = &preallocatedNodes[i]; + node->m_next = head; + head = node; + } + + m_freeList = head; + } +} + +//------------------------------------------------------------------------ +// LocationInfoListNodePool::GetNode: Fetches an unused node from the +// pool. +// +// Arguments: +// l - - The `LsraLocation` for the `LocationInfo` value. +// i - The interval for the `LocationInfo` value. +// t - The IR node for the `LocationInfo` value +// regIdx - The register index for the `LocationInfo` value. +// +// Returns: +// A pooled or newly-allocated `LocationInfoListNode`, depending on the +// contents of the pool. +LocationInfoListNode* LocationInfoListNodePool::GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx) +{ + LocationInfoListNode* head = m_freeList; + if (head == nullptr) + { + head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode))); + } + else + { + m_freeList = head->m_next; + } + + head->loc = l; + head->interval = i; + head->treeNode = t; + head->m_next = nullptr; + + return head; +} + +//------------------------------------------------------------------------ +// LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the node +// pool and clears the given list. +// +// Arguments: +// list - The list to return. +// +void LocationInfoListNodePool::ReturnNodes(LocationInfoList& list) +{ + assert(list.m_head != nullptr); + assert(list.m_tail != nullptr); + + LocationInfoListNode* head = m_freeList; + list.m_tail->m_next = head; + m_freeList = list.m_head; + + list.m_head = nullptr; + list.m_tail = nullptr; +} + +//------------------------------------------------------------------------ +// newInterval: Create a new Interval of the given RegisterType. +// +// Arguments: +// theRegisterType - The type of Interval to create. +// +// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately +// set such fields as isStructField +// +Interval* LinearScan::newInterval(RegisterType theRegisterType) +{ + intervals.emplace_back(theRegisterType, allRegs(theRegisterType)); + Interval* newInt = &intervals.back(); + +#ifdef DEBUG + newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1); +#endif // DEBUG + + DBEXEC(VERBOSE, newInt->dump()); + return newInt; +} + +//------------------------------------------------------------------------ +// newRefPositionRaw: Create a new RefPosition +// +// Arguments: +// nodeLocation - The location of the reference. +// treeNode - The GenTree of the reference. +// refType - The type of reference +// +// Notes: +// This is used to create RefPositions for both RegRecords and Intervals, +// so it does only the common initialization. +// +RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) +{ + refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType); + RefPosition* newRP = &refPositions.back(); +#ifdef DEBUG + newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1); +#endif // DEBUG + return newRP; +} + +//------------------------------------------------------------------------ +// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use +// register requirements on a single-def, single-use interval. +// +// Arguments: +// defRefPosition - The interval definition +// useRefPosition - The (sole) interval use +// +// Return Value: +// None. +// +// Assumptions: +// The two RefPositions are for the same interval, which is a tree-temp. +// +// Notes: +// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg. +// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that, +// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs +// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4 +// below. +// Here are the cases we consider (in this order): +// 1. If The defRefPosition specifies a single register, and there are no conflicting +// FixedReg uses of it between the def and use, we use that register, and the code generator +// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def. +// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no +// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator +// will insert the copy. +// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined +// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register), +/// we set the register requirements on the defRefPosition to the use registers, and the +// code generator will insert a copy on the def. We can't rely on the code generator to put a copy +// on the use if it has multiple possible candidates, as it won't know which one has been allocated. +// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register +// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set +// the useRefPosition to the def registers, for similar reasons to case #3. +// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts, +// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are +// single registers, codegen can insert the copy. +// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but +// has a conflict), we must insert a copy. The copy will be inserted before the use if the +// use is not fixed (in the fixed case, the code generator will insert the use). +// +// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is +// available for the lifetime. We end up allocating a register that must be spilled, and it probably +// won't be the register that is actually defined by the target instruction. So, we have to copy it +// and THEN spill it. In this case, we should be using the def requirement. But we need to change +// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but +// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway +// we can use the fixed-reg on the def. +// + +void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition) +{ + assert(!interval->isLocalVar); + + RefPosition* useRefPosition = defRefPosition->nextRefPosition; + regMaskTP defRegAssignment = defRefPosition->registerAssignment; + regMaskTP useRegAssignment = useRefPosition->registerAssignment; + RegRecord* defRegRecord = nullptr; + RegRecord* useRegRecord = nullptr; + regNumber defReg = REG_NA; + regNumber useReg = REG_NA; + bool defRegConflict = false; + bool useRegConflict = false; + + // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment + // on it, or we will fail to ensure that the fixedReg is busy at the time the target + // (of the node that uses this interval) is allocated. + bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT)); + if (!canChangeUseAssignment) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE)); + } + if (defRefPosition->isFixedRegRef) + { + defReg = defRefPosition->assignedReg(); + defRegRecord = getRegisterRecord(defReg); + if (canChangeUseAssignment) + { + RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; + assert(currFixedRegRefPosition != nullptr && + currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation); + + if (currFixedRegRefPosition->nextRefPosition == nullptr || + currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation()) + { + // This is case #1. Use the defRegAssignment + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1)); + useRefPosition->registerAssignment = defRegAssignment; + return; + } + else + { + defRegConflict = true; + } + } + } + if (useRefPosition->isFixedRegRef) + { + useReg = useRefPosition->assignedReg(); + useRegRecord = getRegisterRecord(useReg); + RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition; + + // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null. + RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition(); + assert(nextFixedRegRefPosition != nullptr && + nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation); + + // First, check to see if there are any conflicting FixedReg references between the def and use. + if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation) + { + // OK, no conflicting FixedReg references. + // Now, check to see whether it is currently in use. + if (useRegRecord->assignedInterval != nullptr) + { + RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition; + LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation(); + if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation) + { + useRegConflict = true; + } + } + if (!useRegConflict) + { + // This is case #2. Use the useRegAssignment + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2)); + defRefPosition->registerAssignment = useRegAssignment; + return; + } + } + else + { + useRegConflict = true; + } + } + if (defRegRecord != nullptr && !useRegConflict) + { + // This is case #3. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3)); + defRefPosition->registerAssignment = useRegAssignment; + return; + } + if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) + { + // This is case #4. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4)); + useRefPosition->registerAssignment = defRegAssignment; + return; + } + if (defRegRecord != nullptr && useRegRecord != nullptr) + { + // This is case #5. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5)); + RegisterType regType = interval->registerType; + assert((getRegisterType(interval, defRefPosition) == regType) && + (getRegisterType(interval, useRefPosition) == regType)); + regMaskTP candidates = allRegs(regType); + defRefPosition->registerAssignment = candidates; + return; + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6)); + return; +} + +//------------------------------------------------------------------------ +// applyCalleeSaveHeuristics: Set register preferences for an interval based on the given RefPosition +// +// Arguments: +// rp - The RefPosition of interest +// +// Notes: +// This is slightly more general than its name applies, and updates preferences not just +// for callee-save registers. +// +void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) +{ +#ifdef _TARGET_AMD64_ + if (compiler->opts.compDbgEnC) + { + // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs. + return; + } +#endif // _TARGET_AMD64_ + + Interval* theInterval = rp->getInterval(); + +#ifdef DEBUG + if (!doReverseCallerCallee()) +#endif // DEBUG + { + // Set preferences so that this register set will be preferred for earlier refs + theInterval->updateRegisterPreferences(rp->registerAssignment); + } +} + +//------------------------------------------------------------------------ +// checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps. +// +// Arguments: +// useRP - The use RefPosition of a tree temp (SDSU Interval) +// +// Notes: +// There are a couple of cases where this may over-constrain allocation: +// 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or +// 2. In the case where the defining node requires a temp distinct from the target (also a +// delay-free case). +// In those cases, if we propagate a single-register restriction from the consumer to the producer +// the delayed uses will not see a fixed reference in the PhysReg at that position, and may +// incorrectly allocate that register. +// TODO-CQ: This means that we may often require a copy at the use of this node's result. +// This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is +// created, causing a RefTypeFixedRef to be added at that location. This, however, results in +// more PhysReg RefPositions (a throughput impact), and a large number of diffs that require +// further analysis to determine benefit. +// See Issue #11274. +// +void LinearScan::checkConflictingDefUse(RefPosition* useRP) +{ + assert(useRP->refType == RefTypeUse); + Interval* theInterval = useRP->getInterval(); + assert(!theInterval->isLocalVar); + + RefPosition* defRP = theInterval->firstRefPosition; + + // All defs must have a valid treeNode, but we check it below to be conservative. + assert(defRP->treeNode != nullptr); + regMaskTP prevAssignment = defRP->registerAssignment; + regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); + if (newAssignment != RBM_NONE) + { + if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) + { + defRP->registerAssignment = newAssignment; + } + } + else + { + theInterval->hasConflictingDefUse = true; + } +} + +//------------------------------------------------------------------------ +// associateRefPosWithInterval: Update the Interval based on the given RefPosition. +// +// Arguments: +// rp - The RefPosition of interest +// +// Notes: +// This is called at the time when 'rp' has just been created, so it becomes +// the nextRefPosition of the recentRefPosition, and both the recentRefPosition +// and lastRefPosition of its referent. +// +void LinearScan::associateRefPosWithInterval(RefPosition* rp) +{ + Referenceable* theReferent = rp->referent; + + if (theReferent != nullptr) + { + // All RefPositions except the dummy ones at the beginning of blocks + + if (rp->isIntervalRef()) + { + Interval* theInterval = rp->getInterval(); + + applyCalleeSaveHeuristics(rp); + + if (theInterval->isLocalVar) + { + if (RefTypeIsUse(rp->refType)) + { + RefPosition* const prevRP = theInterval->recentRefPosition; + if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum)) + { + prevRP->lastUse = false; + } + } + + rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) && + (rp->refType != RefTypeZeroInit) && !extendLifetimes(); + } + else if (rp->refType == RefTypeUse) + { + checkConflictingDefUse(rp); + rp->lastUse = true; + } + } + + RefPosition* prevRP = theReferent->recentRefPosition; + if (prevRP != nullptr) + { + prevRP->nextRefPosition = rp; + } + else + { + theReferent->firstRefPosition = rp; + } + theReferent->recentRefPosition = rp; + theReferent->lastRefPosition = rp; + } + else + { + assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); + } +} + +//--------------------------------------------------------------------------- +// newRefPosition: allocate and initialize a new RefPosition. +// +// Arguments: +// reg - reg number that identifies RegRecord to be associated +// with this RefPosition +// theLocation - LSRA location of RefPosition +// theRefType - RefPosition type +// theTreeNode - GenTree node for which this RefPosition is created +// mask - Set of valid registers for this RefPosition +// multiRegIdx - register position if this RefPosition corresponds to a +// multi-reg call node. +// +// Return Value: +// a new RefPosition +// +RefPosition* LinearScan::newRefPosition( + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) +{ + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); + + newRP->setReg(getRegisterRecord(reg)); + newRP->registerAssignment = mask; + + newRP->setMultiRegIdx(0); + newRP->setAllocateIfProfitable(false); + + associateRefPosWithInterval(newRP); + + DBEXEC(VERBOSE, newRP->dump()); + return newRP; +} + +//--------------------------------------------------------------------------- +// newRefPosition: allocate and initialize a new RefPosition. +// +// Arguments: +// theInterval - interval to which RefPosition is associated with. +// theLocation - LSRA location of RefPosition +// theRefType - RefPosition type +// theTreeNode - GenTree node for which this RefPosition is created +// mask - Set of valid registers for this RefPosition +// multiRegIdx - register position if this RefPosition corresponds to a +// multi-reg call node. +// +// Return Value: +// a new RefPosition +// +RefPosition* LinearScan::newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskTP mask, + unsigned multiRegIdx /* = 0 */) +{ +#ifdef DEBUG + if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) + { + // In the case we're using floating point registers we must make sure + // this flag was set previously in the compiler since this will mandate + // whether LSRA will take into consideration FP reg killsets. + assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0)); + } +#endif // DEBUG + + // If this reference is constrained to a single register (and it's not a dummy + // or Kill reftype already), add a RefTypeFixedReg at this location so that its + // availability can be more accurately determined + + bool isFixedRegister = isSingleRegister(mask); + bool insertFixedRef = false; + if (isFixedRegister) + { + // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB) + if (theRefType == RefTypeUse || theRefType == RefTypeDef) + { + insertFixedRef = true; + } + } + + if (insertFixedRef) + { + regNumber physicalReg = genRegNumFromMask(mask); + RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); + assert(theInterval != nullptr); + assert((allRegs(theInterval->registerType) & mask) != 0); + } + + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); + + newRP->setInterval(theInterval); + + // Spill info + newRP->isFixedRegRef = isFixedRegister; + +#ifndef _TARGET_AMD64_ + // We don't need this for AMD because the PInvoke method epilog code is explicit + // at register allocation time. + if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged && + theInterval->varNum == compiler->genReturnLocal) + { + mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); + noway_assert(mask != RBM_NONE); + } +#endif // !_TARGET_AMD64_ + newRP->registerAssignment = mask; + + newRP->setMultiRegIdx(multiRegIdx); + newRP->setAllocateIfProfitable(false); + + associateRefPosWithInterval(newRP); + + DBEXEC(VERBOSE, newRP->dump()); + return newRP; +} + +//------------------------------------------------------------------------ +// IsContainableMemoryOp: Checks whether this is a memory op that can be contained. +// +// Arguments: +// node - the node of interest. +// +// Return value: +// True if this will definitely be a memory reference that could be contained. +// +// Notes: +// This differs from the isMemoryOp() method on GenTree because it checks for +// the case of doNotEnregister local. This won't include locals that +// for some other reason do not become register candidates, nor those that get +// spilled. +// Also, because we usually call this before we redo dataflow, any new lclVars +// introduced after the last dataflow analysis will not yet be marked lvTracked, +// so we don't use that. +// +bool LinearScan::isContainableMemoryOp(GenTree* node) +{ + if (node->isMemoryOp()) + { + return true; + } + if (node->IsLocal()) + { + if (!enregisterLocalVars) + { + return true; + } + LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum]; + return varDsc->lvDoNotEnregister; + } + return false; +} + +//------------------------------------------------------------------------ +// addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'. +// +// Arguments: +// mask - the mask (set) of registers. +// currentLoc - the location at which they should be added +// refType - the type of refposition +// isLastUse - true IFF this is a last use of the register +// +void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) +{ + for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) + { + if (mask & 1) + { + // This assumes that these are all "special" RefTypes that + // don't need to be recorded on the tree (hence treeNode is nullptr) + RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, + genRegMask(reg)); // This MUST occupy the physical register (obviously) + + if (isLastUse) + { + pos->lastUse = true; + } + } + } +} + +//------------------------------------------------------------------------ +// getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node. +// If the GT_STOREIND will generate a write barrier, determine the specific kill +// set required by the case-specific, platform-specific write barrier. If no +// write barrier is required, the kill set will be RBM_NONE. +// +// Arguments: +// tree - the GT_STOREIND node +// +// Return Value: a register mask of the registers killed +// +regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) +{ + assert(tree->OperIs(GT_STOREIND)); + + regMaskTP killMask = RBM_NONE; + + GenTree* data = tree->Data(); + + GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm)) + { + // We can't determine the exact helper to be used at this point, because it depends on + // the allocated register for the `data` operand. However, all the (x86) optimized + // helpers have the same kill set: EDX. + killMask = RBM_CALLEE_TRASH_NOGC; + } + else + { + // Figure out which helper we're going to use, and then get the kill set for that helper. + CorInfoHelpFunc helper = + compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm); + killMask = compiler->compHelperCallKillSet(helper); + } + } + + return killMask; +} + +//------------------------------------------------------------------------ +// getKillSetForNode: Return the registers killed by the given tree node. +// +// Arguments: +// compiler - the compiler context to use +// tree - the tree for which the kill set is needed. +// +// Return Value: a register mask of the registers killed +// +regMaskTP LinearScan::getKillSetForNode(GenTree* tree) +{ + regMaskTP killMask = RBM_NONE; + switch (tree->OperGet()) + { +#ifdef _TARGET_XARCH_ + case GT_MUL: + // We use the 128-bit multiply when performing an overflow checking unsigned multiply + // + if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx()) + { + // Both RAX and RDX are killed by the operation + killMask = RBM_RAX | RBM_RDX; + } + break; + + case GT_MULHI: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif + killMask = RBM_RAX | RBM_RDX; + break; + + case GT_MOD: + case GT_DIV: + case GT_UMOD: + case GT_UDIV: + if (!varTypeIsFloating(tree->TypeGet())) + { + // Both RAX and RDX are killed by the operation + killMask = RBM_RAX | RBM_RDX; + } + break; +#endif // _TARGET_XARCH_ + + case GT_STORE_OBJ: + if (tree->OperIsCopyBlkOp()) + { + assert(tree->AsObj()->gtGcPtrCount != 0); + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF); + break; + } + __fallthrough; + + case GT_STORE_BLK: + case GT_STORE_DYN_BLK: + { + GenTreeBlk* blkNode = tree->AsBlk(); + bool isCopyBlk = varTypeIsStruct(blkNode->Data()); + switch (blkNode->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindHelper: + if (isCopyBlk) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); + } + else + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); + } + break; + +#ifdef _TARGET_XARCH_ + case GenTreeBlk::BlkOpKindRepInstr: + if (isCopyBlk) + { + // rep movs kills RCX, RDI and RSI + killMask = RBM_RCX | RBM_RDI | RBM_RSI; + } + else + { + // rep stos kills RCX and RDI. + // (Note that the Data() node, if not constant, will be assigned to + // RCX, but it's find that this kills it, as the value is not available + // after this node in any case.) + killMask = RBM_RDI | RBM_RCX; + } + break; +#else + case GenTreeBlk::BlkOpKindRepInstr: +#endif + case GenTreeBlk::BlkOpKindUnroll: + case GenTreeBlk::BlkOpKindInvalid: + // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE + break; + } + } + break; + + case GT_RETURNTRAP: + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); + break; + case GT_CALL: +#ifdef _TARGET_X86_ + if (compiler->compFloatingPointUsed) + { + if (tree->TypeGet() == TYP_DOUBLE) + { + needDoubleTmpForFPCall = true; + } + else if (tree->TypeGet() == TYP_FLOAT) + { + needFloatTmpForFPCall = true; + } + } +#endif // _TARGET_X86_ +#if defined(_TARGET_X86_) || defined(_TARGET_ARM_) + if (tree->IsHelperCall()) + { + GenTreeCall* call = tree->AsCall(); + CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); + killMask = compiler->compHelperCallKillSet(helpFunc); + } + else +#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) + { + // if there is no FP used, we can ignore the FP kills + if (compiler->compFloatingPointUsed) + { + killMask = RBM_CALLEE_TRASH; + } + else + { + killMask = RBM_INT_CALLEE_TRASH; + } +#ifdef _TARGET_ARM_ + if (tree->AsCall()->IsVirtualStub()) + { + killMask |= compiler->virtualStubParamInfo->GetRegMask(); + } +#else // !_TARGET_ARM_ + // Verify that the special virtual stub call registers are in the kill mask. + // We don't just add them unconditionally to the killMask because for most architectures + // they are already in the RBM_CALLEE_TRASH set, + // and we don't want to introduce extra checks and calls in this hot function. + assert(!tree->AsCall()->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == + compiler->virtualStubParamInfo->GetRegMask())); +#endif + } + break; + case GT_STOREIND: + killMask = getKillSetForStoreInd(tree->AsStoreInd()); + break; + +#if defined(PROFILING_SUPPORTED) + // If this method requires profiler ELT hook then mark these nodes as killing + // callee trash registers (excluding RAX and XMM0). The reason for this is that + // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for + // more details. + case GT_RETURN: + if (compiler->compIsProfilerHookNeeded()) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE); + } + break; + + case GT_PROF_HOOK: + if (compiler->compIsProfilerHookNeeded()) + { + killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL); + } + break; +#endif // PROFILING_SUPPORTED + + default: + // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE + break; + } + return killMask; +} + +//------------------------------------------------------------------------ +// buildKillPositionsForNode: +// Given some tree node add refpositions for all the registers this node kills +// +// Arguments: +// tree - the tree for which kill positions should be generated +// currentLoc - the location at which the kills should be added +// +// Return Value: +// true - kills were inserted +// false - no kills were inserted +// +// Notes: +// The return value is needed because if we have any kills, we need to make sure that +// all defs are located AFTER the kills. On the other hand, if there aren't kills, +// the multiple defs for a regPair are in different locations. +// If we generate any kills, we will mark all currentLiveVars as being preferenced +// to avoid the killed registers. This is somewhat conservative. + +bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc) +{ + regMaskTP killMask = getKillSetForNode(tree); + bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + if (killMask != RBM_NONE) + { + // The killMask identifies a set of registers that will be used during codegen. + // Mark these as modified here, so when we do final frame layout, we'll know about + // all these registers. This is especially important if killMask contains + // callee-saved registers, which affect the frame size since we need to save/restore them. + // In the case where we have a copyBlk with GC pointers, can need to call the + // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if + // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen, + // which is too late. + compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true)); + + addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); + + // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee + // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs, + // many of which appear to be regressions (because there is more spill on the infrequently path), + // but are not really because the frequent path becomes smaller. Validating these diffs will need + // to be done before making this change. + // if (!blockSequence[curBBSeqNum]->isRunRarely()) + if (enregisterLocalVars) + { + VarSetOps::Iter iter(compiler, currentLiveVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) + { + if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) + { + continue; + } + } + else +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (varTypeIsFloating(varDsc) && + !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex)) + { + continue; + } + Interval* interval = getIntervalForLocalVar(varIndex); + if (isCallKill) + { + interval->preferCalleeSave = true; + } + regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); + + if (newPreferences != RBM_NONE) + { + interval->updateRegisterPreferences(newPreferences); + } + else + { + // If there are no callee-saved registers, the call could kill all the registers. + // This is a valid state, so in that case assert should not trigger. The RA will spill in order to + // free a register later. + assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); + } + } + } + + if (compiler->killGCRefs(tree)) + { + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, + (allRegs(TYP_REF) & ~RBM_ARG_REGS)); + } + return true; + } + + return false; +} + +//---------------------------------------------------------------------------- +// defineNewInternalTemp: Defines a ref position for an internal temp. +// +// Arguments: +// tree - Gentree node requiring an internal register +// regType - Register type +// currentLoc - Location of the temp Def position +// regMask - register mask of candidates for temp +// +RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) +{ + Interval* current = newInterval(regType); + current->isInternal = true; + return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0); +} + +//------------------------------------------------------------------------ +// buildInternalRegisterDefsForNode - build Def positions for internal +// registers required for tree node. +// +// Arguments: +// tree - Gentree node that needs internal registers +// temps - in-out array which is populated with ref positions +// created for Def of internal registers +// +// Returns: +// The total number of Def positions created for internal registers of tree no. +int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* temps[]) +{ + int count; + int internalIntCount = info->internalIntCount; + regMaskTP internalCands = info->getInternalCandidates(this); + + // If the number of internal integer registers required is the same as the number of candidate integer registers in + // the candidate set, then they must be handled as fixed registers. + // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.) + bool fixedRegs = false; + regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT)); + if (((int)genCountBits(internalIntCandidates)) == internalIntCount) + { + fixedRegs = true; + } + + for (count = 0; count < internalIntCount; count++) + { + regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT)); + if (fixedRegs) + { + internalIntCands = genFindLowestBit(internalIntCands); + internalCands &= ~internalIntCands; + } + temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands); + } + + int internalFloatCount = info->internalFloatCount; + for (int i = 0; i < internalFloatCount; i++) + { + regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates()); + temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, internalFPCands); + } + + assert(count < MaxInternalRegisters); + assert(count == (internalIntCount + internalFloatCount)); + return count; +} + +//------------------------------------------------------------------------ +// buildInternalRegisterUsesForNode - adds Use positions for internal +// registers required for tree node. +// +// Arguments: +// tree - Gentree node that needs internal registers +// defs - int array containing Def positions of internal +// registers. +// total - Total number of Def positions in 'defs' array. +// +// Returns: +// Void. +void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total) +{ + assert(total < MaxInternalRegisters); + + // defs[] has been populated by buildInternalRegisterDefsForNode + // now just add uses to the defs previously added. + for (int i = 0; i < total; i++) + { + RefPosition* prevRefPosition = defs[i]; + assert(prevRefPosition != nullptr); + regMaskTP mask = prevRefPosition->registerAssignment; + if (prevRefPosition->isPhysRegRef) + { + newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask); + } + else + { + RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0); + + if (info->isInternalRegDelayFree) + { + newest->delayRegFree = true; + } + } + } +} + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +//------------------------------------------------------------------------ +// buildUpperVectorSaveRefPositions - Create special RefPositions for saving +// the upper half of a set of large vector. +// +// Arguments: +// tree - The current node being handled +// currentLoc - The location of the current node +// +// Return Value: Returns the set of lclVars that are killed by this node, and therefore +// required RefTypeUpperVectorSaveDef RefPositions. +// +// Notes: The returned set is used by buildUpperVectorRestoreRefPositions. +// +VARSET_VALRET_TP +LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc) +{ + assert(enregisterLocalVars); + VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler)); + regMaskTP fpCalleeKillSet = RBM_NONE; + if (!VarSetOps::IsEmpty(compiler, largeVectorVars)) + { + // We actually need to find any calls that kill the upper-half of the callee-save vector registers. + // But we will use as a proxy any node that kills floating point registers. + // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) + fpCalleeKillSet = getKillSetForNode(tree); + if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE) + { + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, + VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars)); + VarSetOps::Iter iter(compiler, liveLargeVectors); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + Interval* varInterval = getIntervalForLocalVar(varIndex); + Interval* tempInterval = newInterval(varInterval->registerType); + tempInterval->isInternal = true; + RefPosition* pos = + newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED); + // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set + // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding + // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval, + // and set varInterval as the relatedInterval of tempInterval. + tempInterval->relatedInterval = varInterval->relatedInterval; + varInterval->relatedInterval = tempInterval; + } + } + } + return liveLargeVectors; +} + +// buildUpperVectorRestoreRefPositions - Create special RefPositions for restoring +// the upper half of a set of large vectors. +// +// Arguments: +// tree - The current node being handled +// currentLoc - The location of the current node +// liveLargeVectors - The set of lclVars needing restores (returned by buildUpperVectorSaveRefPositions) +// +void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree, + LsraLocation currentLoc, + VARSET_VALARG_TP liveLargeVectors) +{ + assert(enregisterLocalVars); + if (!VarSetOps::IsEmpty(compiler, liveLargeVectors)) + { + VarSetOps::Iter iter(compiler, liveLargeVectors); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + Interval* varInterval = getIntervalForLocalVar(varIndex); + Interval* tempInterval = varInterval->relatedInterval; + assert(tempInterval->isInternal == true); + RefPosition* pos = + newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED); + // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval + // of tempInterval. + varInterval->relatedInterval = tempInterval->relatedInterval; + tempInterval->relatedInterval = varInterval; + } + } +} +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + +#ifdef DEBUG +//------------------------------------------------------------------------ +// ComputeOperandDstCount: computes the number of registers defined by a +// node. +// +// For most nodes, this is simple: +// - Nodes that do not produce values (e.g. stores and other void-typed +// nodes) and nodes that immediately use the registers they define +// produce no registers +// - Nodes that are marked as defining N registers define N registers. +// +// For contained nodes, however, things are more complicated: for purposes +// of bookkeeping, a contained node is treated as producing the transitive +// closure of the registers produced by its sources. +// +// Arguments: +// operand - The operand for which to compute a register count. +// +// Returns: +// The number of registers defined by `operand`. +// +// static +int LinearScan::ComputeOperandDstCount(GenTree* operand) +{ + // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though + // note that it is not in the linear order. It seems best to check for !IsLIR() rather than + // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method. + if (!operand->IsLIR()) + { + return 0; + } + if (operand->isContained()) + { + int dstCount = 0; + for (GenTree* op : operand->Operands()) + { + dstCount += ComputeOperandDstCount(op); + } + + return dstCount; + } + if (operand->IsUnusedValue()) + { + // Operands that define an unused value do not produce any registers. + return 0; + } + if (operand->IsValue()) + { + // Operands that are values and are not contained consume all of their operands + // and produce one or more registers. + return operand->GetRegisterDstCount(); + } + else + { + // This must be one of the operand types that are neither contained nor produce a value. + // Stores and void-typed operands may be encountered when processing call nodes, which contain + // pointers to argument setup stores. + assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || + operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || + operand->TypeGet() == TYP_VOID); + return 0; + } +} + +//------------------------------------------------------------------------ +// ComputeAvailableSrcCount: computes the number of registers available as +// sources for a node. +// +// This is simply the sum of the number of registers produced by each +// operand to the node. +// +// Arguments: +// node - The node for which to compute a source count. +// +// Return Value: +// The number of registers available as sources for `node`. +// +// static +int LinearScan::ComputeAvailableSrcCount(GenTree* node) +{ + int numSources = 0; + for (GenTree* operand : node->Operands()) + { + numSources += ComputeOperandDstCount(operand); + } + + return numSources; +} +#endif // DEBUG + +//------------------------------------------------------------------------ +// buildRefPositionsForNode: The main entry point for building the RefPositions +// and "tree temp" Intervals for a given node. +// +// Arguments: +// tree - The node for which we are building RefPositions +// block - The BasicBlock in which the node resides +// currentLoc - The LsraLocation of the given node +// +void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc) +{ +#ifdef _TARGET_ARM_ + assert(!isRegPairType(tree->TypeGet())); +#endif // _TARGET_ARM_ + + // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. + // GT_CLS_VAR nodes should have been eliminated by rationalizer. + assert(tree->OperGet() != GT_ARGPLACE); + assert(tree->OperGet() != GT_LIST); + assert(tree->OperGet() != GT_CLS_VAR); + + // The LIR traversal visits only the first node in a GT_FIELD_LIST. + assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead()); + + // The set of internal temporary registers used by this node are stored in the + // gtRsvdRegs register mask. Clear it out. + tree->gtRsvdRegs = RBM_NONE; + +#ifdef DEBUG + if (VERBOSE) + { + dumpDefList(); + compiler->gtDispTree(tree, nullptr, nullptr, true); + } +#endif // DEBUG + + // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will + // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will + // be thrown away when we're done. + LocationInfoListNode* locationInfo = nullptr; + TreeNodeInfo tempInfo; + TreeNodeInfo* info = nullptr; + int consume = 0; + int produce = 0; + if (!tree->isContained()) + { + if (tree->IsValue()) + { + locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree); + currentNodeInfo = &locationInfo->info; + } + else + { + currentNodeInfo = &tempInfo; + } + info = currentNodeInfo; + info->Initialize(this, tree); + BuildNode(tree); + assert(info->IsValid(this)); + consume = info->srcCount; + produce = info->dstCount; +#ifdef DEBUG + if (VERBOSE) + { + printf(" +"); + info->dump(this); + tree->dumpLIRFlags(); + printf("\n"); + } +#endif // DEBUG + } + +#ifdef DEBUG + if (VERBOSE) + { + if (tree->isContained()) + { + JITDUMP("Contained\n"); + } + else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && tree->IsUnusedValue()) + { + JITDUMP("Unused\n"); + } + else + { + JITDUMP(" consume=%d produce=%d\n", consume, produce); + } + } +#endif // DEBUG + + assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume)); + + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; + if (isCandidateVar(varDsc)) + { + assert(consume == 0); + + // We handle tracked variables differently from non-tracked ones. If it is tracked, + // we simply add a use or def of the tracked variable. Otherwise, for a use we need + // to actually add the appropriate references for loading or storing the variable. + // + // It won't actually get used or defined until the appropriate ancestor tree node + // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument + // to a call + + assert(varDsc->lvTracked); + unsigned varIndex = varDsc->lvVarIndex; + + // We have only approximate last-use information at this point. This is because the + // execution order doesn't actually reflect the true order in which the localVars + // are referenced - but the order of the RefPositions will, so we recompute it after + // RefPositions are built. + // Use the old value for setting currentLiveVars - note that we do this with the + // not-quite-correct setting of lastUse. However, this is OK because + // 1) this is only for preferencing, which doesn't require strict correctness, and + // 2) the cases where these out-of-order uses occur should not overlap a kill. + // TODO-Throughput: clean this up once we have the execution order correct. At that point + // we can update currentLiveVars at the same place that we create the RefPosition. + if ((tree->gtFlags & GTF_VAR_DEATH) != 0) + { + VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); + } + + if (!tree->IsUnusedValue() && !tree->isContained()) + { + assert(produce != 0); + + locationInfo->interval = getIntervalForLocalVar(varIndex); + defList.Append(locationInfo); + } + return; + } + } + if (tree->isContained()) + { + return; + } + + // Handle the case of local variable assignment + Interval* varDefInterval = nullptr; + + GenTree* defNode = tree; + + // noAdd means the node creates a def but for purposes of map + // management do not add it because data is not flowing up the + // tree + + bool noAdd = info->isLocalDefUse; + RefPosition* prevPos = nullptr; + + bool isSpecialPutArg = false; + + assert(!tree->OperIsAssignment()); + if (tree->OperIsLocalStore()) + { + GenTreeLclVarCommon* const store = tree->AsLclVarCommon(); + assert((consume > 1) || (regType(store->gtOp1->TypeGet()) == regType(store->TypeGet()))); + + LclVarDsc* varDsc = &compiler->lvaTable[store->gtLclNum]; + if (isCandidateVar(varDsc)) + { + // We always push the tracked lclVar intervals + assert(varDsc->lvTracked); + unsigned varIndex = varDsc->lvVarIndex; + varDefInterval = getIntervalForLocalVar(varIndex); + assert((store->gtFlags & GTF_VAR_DEF) != 0); + defNode = tree; + if (produce == 0) + { + produce = 1; + noAdd = true; + } + + assert(consume <= MAX_RET_REG_COUNT); + if (consume == 1) + { + // Get the location info for the register defined by the first operand. + LocationInfoListNode& operandInfo = *(useList.Begin()); + assert(operandInfo.treeNode == tree->gtGetOp1()); + + Interval* srcInterval = operandInfo.interval; + if (srcInterval->relatedInterval == nullptr) + { + // Preference the source to the dest, unless this is a non-last-use localVar. + // Note that the last-use info is not correct, but it is a better approximation than preferencing + // the source to the dest, if the source's lifetime extends beyond the dest. + if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0) + { + srcInterval->assignRelatedInterval(varDefInterval); + } + } + else if (!srcInterval->isLocalVar) + { + // Preference the source to dest, if src is not a local var. + srcInterval->assignRelatedInterval(varDefInterval); + } + } + + if ((tree->gtFlags & GTF_VAR_DEATH) == 0) + { + VarSetOps::AddElemD(compiler, currentLiveVars, varIndex); + } + } + else if (store->gtOp1->OperIs(GT_BITCAST)) + { + store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet(); + + // Get the location info for the register defined by the first operand. + LocationInfoListNode& operandInfo = *(useList.Begin()); + assert(operandInfo.treeNode == tree->gtGetOp1()); + + Interval* srcInterval = operandInfo.interval; + srcInterval->registerType = regType(store->TypeGet()); + + RefPosition* srcDefPosition = srcInterval->firstRefPosition; + assert(srcDefPosition != nullptr); + assert(srcDefPosition->refType == RefTypeDef); + assert(srcDefPosition->treeNode == store->gtOp1); + + srcDefPosition->registerAssignment = allRegs(store->TypeGet()); + operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet())); + } + } + else if (noAdd && produce == 0) + { + // Dead nodes may remain after tree rationalization, decomposition or lowering. + // They should be marked as UnusedValue. + // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation. + assert(!noAdd || (produce != 0)); + } + + Interval* prefSrcInterval = nullptr; + + // If this is a binary operator that will be encoded with 2 operand fields + // (i.e. the target is read-modify-write), preference the dst to op1. + + bool hasDelayFreeSrc = info->hasDelayFreeSrc; + +#if defined(DEBUG) && defined(_TARGET_X86_) + // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode + // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the + // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in + // situations like the following: + // + // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d> + // + // /--* t1026 ref + // t1352 = * putarg_reg ref REG NA + // + // t342 = lclVar int V14 loc6 u:4 REG NA $50c + // + // t343 = const int 1 REG NA $41 + // + // /--* t342 int + // +--* t343 int + // t344 = * + int REG NA $495 + // + // t345 = lclVar int V04 arg4 u:2 REG NA $100 + // + // /--* t344 int + // +--* t345 int + // t346 = * % int REG NA $496 + // + // /--* t346 int + // t1353 = * putarg_reg int REG NA + // + // t1354 = lclVar ref V52 tmp35 (last use) REG NA + // + // /--* t1354 ref + // t1355 = * lea(b+0) byref REG NA + // + // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the + // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register + // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to + // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be + // available due to the special put arg. + const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER; +#else + const bool supportsSpecialPutArg = true; +#endif + + if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && + (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0) + { + // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, + // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register + // (which is actually what we hope will happen). + JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n"); + + // Get the register information for the first operand of the node. + LocationInfoListNode* operandDef = useList.Begin(); + assert(operandDef->treeNode == tree->gtGetOp1()); + + // Preference the destination to the interval of the first register defined by the first operand. + Interval* srcInterval = operandDef->interval; + assert(srcInterval->isLocalVar); + prefSrcInterval = srcInterval; + isSpecialPutArg = true; + } + + RefPosition* internalRefs[MaxInternalRegisters]; + +#ifdef DEBUG + // If we are constraining the registers for allocation, we will modify all the RefPositions + // we've built for this node after we've created them. In order to do that, we'll remember + // the last RefPosition prior to those created for this node. + RefPositionIterator refPositionMark = refPositions.backPosition(); +#endif // DEBUG + + // Make intervals for all the 'internal' register requirements for this node, + // where internal means additional registers required temporarily. + // Create a RefTypeDef RefPosition for each such interval. + int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs); + + // Make use RefPositions for all used values. + int consumed = 0; + for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end; + listNode = listNode->Next()) + { + LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode); + + // For tree temps, a use is always a last use and the end of the range; + // this is set by default in newRefPosition + GenTree* const useNode = locInfo.treeNode; + assert(useNode != nullptr); + + Interval* srcInterval = locInfo.interval; + TreeNodeInfo& useNodeInfo = locInfo.info; + if (useNodeInfo.isTgtPref) + { + prefSrcInterval = srcInterval; + } + + const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree); + + regMaskTP candidates = useNodeInfo.getSrcCandidates(this); +#ifdef _TARGET_ARM_ + regMaskTP allCandidates = candidates; + + if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp()) + { + // get i-th candidate, set bits in useCandidates must be in sequential order. + candidates = genFindLowestReg(allCandidates); + allCandidates &= ~candidates; + } +#endif // _TARGET_ARM_ + + assert((candidates & allRegs(srcInterval->registerType)) != 0); + + // For non-localVar uses we record nothing, as nothing needs to be written back to the tree. + GenTree* const refPosNode = srcInterval->isLocalVar ? useNode : nullptr; + RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, 0); + if (delayRegFree) + { + pos->delayRegFree = true; + } + + if (useNode->IsRegOptional()) + { + pos->setAllocateIfProfitable(true); + } + consumed++; + + // Create additional use RefPositions for multi-reg nodes. + for (int idx = 1; idx < locInfo.info.dstCount; idx++) + { + noway_assert(srcInterval->relatedInterval != nullptr); + srcInterval = srcInterval->relatedInterval; +#ifdef _TARGET_ARM_ + if (useNode->OperIsPutArgSplit() || + (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST))) + { + // get first candidate, set bits in useCandidates must be in sequential order. + candidates = genFindLowestReg(allCandidates); + allCandidates &= ~candidates; + } +#endif // _TARGET_ARM_ + RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, idx); + consumed++; + } + } + + assert(consumed == consume); + if (consume != 0) + { + listNodePool.ReturnNodes(useList); + } + + buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount); + + RegisterType registerType = getDefType(tree); + regMaskTP candidates = info->getDstCandidates(this); + regMaskTP useCandidates = info->getSrcCandidates(this); + +#ifdef DEBUG + if (VERBOSE && produce) + { + printf("Def candidates "); + dumpRegMask(candidates); + printf(", Use candidates "); + dumpRegMask(useCandidates); + printf("\n"); + } +#endif // DEBUG + +#if defined(_TARGET_AMD64_) + // Multi-reg call node is the only node that could produce multi-reg value + assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT)); +#endif // _TARGET_xxx_ + + // Add kill positions before adding def positions + buildKillPositionsForNode(tree, currentLoc + 1); + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + VARSET_TP liveLargeVectors(VarSetOps::UninitVal()); + if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE)) + { + // Build RefPositions for saving any live large vectors. + // This must be done after the kills, so that we know which large vectors are still live. + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1)); + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + ReturnTypeDesc* retTypeDesc = nullptr; + bool isMultiRegCall = tree->IsMultiRegCall(); + if (isMultiRegCall) + { + retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + assert((int)genCountBits(candidates) == produce); + assert(candidates == retTypeDesc->GetABIReturnRegs()); + } + + // push defs + LocationInfoList locationInfoList; + LsraLocation defLocation = currentLoc + 1; + Interval* interval = varDefInterval; + // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field. + // Keep track of the previous interval allocated, for that purpose. + Interval* prevInterval = nullptr; + for (int i = 0; i < produce; i++) + { + regMaskTP currCandidates = candidates; + + // In case of multi-reg call node, registerType is given by + // the type of ith position return register. + if (isMultiRegCall) + { + registerType = retTypeDesc->GetReturnRegType((unsigned)i); + currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i)); + useCandidates = allRegs(registerType); + } + +#ifdef _TARGET_ARM_ + // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order. + if (tree->OperIsPutArgSplit() || tree->OperIsMultiRegOp()) + { + // get i-th candidate + currCandidates = genFindLowestReg(candidates); + candidates &= ~currCandidates; + } +#endif // _TARGET_ARM_ + + if (interval == nullptr) + { + // Make a new interval + interval = newInterval(registerType); + if (hasDelayFreeSrc || info->isInternalRegDelayFree) + { + interval->hasInterferingUses = true; + } + else if (tree->OperIsConst()) + { + assert(!tree->IsReuseRegVal()); + interval->isConstant = true; + } + + if ((currCandidates & useCandidates) != RBM_NONE) + { + interval->updateRegisterPreferences(currCandidates & useCandidates); + } + + if (isSpecialPutArg) + { + interval->isSpecialPutArg = true; + } + } + else + { + assert(registerTypesEquivalent(interval->registerType, registerType)); + } + + if (prefSrcInterval != nullptr) + { + interval->assignRelatedIntervalIfUnassigned(prefSrcInterval); + } + + // for assignments, we want to create a refposition for the def + // but not push it + if (!noAdd) + { + if (i == 0) + { + locationInfo->interval = interval; + prevInterval = interval; + defList.Append(locationInfo); + } + else + { + // This is the 2nd or subsequent register defined by a multi-reg node. + // Connect them using 'relatedInterval'. + noway_assert(prevInterval != nullptr); + prevInterval->relatedInterval = interval; + prevInterval = interval; + prevInterval->isMultiReg = true; + interval->isMultiReg = true; + } + } + + RefPosition* pos = newRefPosition(interval, defLocation, RefTypeDef, defNode, currCandidates, (unsigned)i); + if (info->isLocalDefUse) + { + // This must be an unused value, OR it is a special node for which we allocate + // a target register even though it produces no value. + assert(defNode->IsUnusedValue() || (defNode->gtOper == GT_LOCKADD)); + pos->isLocalDefUse = true; + pos->lastUse = true; + } + interval->updateRegisterPreferences(currCandidates); + interval->updateRegisterPreferences(useCandidates); + interval = nullptr; + } + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + // SaveDef position must be at the same location as Def position of call node. + if (enregisterLocalVars) + { + buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors); + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + +#ifdef DEBUG + // If we are constraining registers, modify all the RefPositions we've just built to specify the + // minimum reg count required. + if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT)) + { + // The number of registers required for a tree node is the sum of + // consume + produce + internalCount. This is the minimum + // set of registers that needs to be ensured in candidate + // set of ref positions created. + unsigned minRegCount = consume + produce + info->internalIntCount + info->internalFloatCount; + + for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) + { + RefPosition* newRefPosition = &(*refPositionMark); + unsigned minRegCountForRef = minRegCount; + if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree) + { + // If delayRegFree, then Use will interfere with the destination of the consuming node. + // Therefore, we also need add the kill set of the consuming node to minRegCount. + // + // For example consider the following IR on x86, where v01 and v02 + // are method args coming in ecx and edx respectively. + // GT_DIV(v01, v02) + // + // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node. + // + // Assume further JitStressRegs=2, which would constrain candidates to callee trashable + // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01. + // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and + // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for + // the use position of v02 also needs to take into account the kill set of its consuming node. + regMaskTP killMask = getKillSetForNode(tree); + if (killMask != RBM_NONE) + { + minRegCountForRef += genCountBits(killMask); + } + } + newRefPosition->minRegCandidateCount = minRegCountForRef; + if (newRefPosition->IsActualRef() && doReverseCallerCallee()) + { + Interval* interval = newRefPosition->getInterval(); + regMaskTP oldAssignment = newRefPosition->registerAssignment; + regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); + newRefPosition->registerAssignment = + getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef); + if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && + !interval->isLocalVar) + { + checkConflictingDefUse(newRefPosition); + } + } + } + } +#endif // DEBUG + JITDUMP("\n"); +} + +//------------------------------------------------------------------------ +// buildPhysRegRecords: Make an interval for each physical register +// +void LinearScan::buildPhysRegRecords() +{ + RegisterType regType = IntRegisterType; + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* curr = &physRegs[reg]; + curr->init(reg); + } +} + +//------------------------------------------------------------------------ +// getNonEmptyBlock: Return the first non-empty block starting with 'block' +// +// Arguments: +// block - the BasicBlock from which we start looking +// +// Return Value: +// The first non-empty BasicBlock we find. +// +BasicBlock* getNonEmptyBlock(BasicBlock* block) +{ + while (block != nullptr && block->bbTreeList == nullptr) + { + BasicBlock* nextBlock = block->bbNext; + // Note that here we use the version of NumSucc that does not take a compiler. + // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan. + // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which + // case we don't need the version that takes a compiler. + assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE))); + // sometimes the first block is empty and ends with an uncond branch + // assert( block->GetSucc(0) == nextBlock); + block = nextBlock; + } + assert(block != nullptr && block->bbTreeList != nullptr); + return block; +} + +//------------------------------------------------------------------------ +// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block +// +// Notes: +// Prior to calling this method, 'currentLiveVars' must be set to the set of register +// candidate variables that are liveIn to the first block. +// For each register candidate that is live-in to the first block: +// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created. +// - Otherwise, it will be marked as spilled, since it will not be assigned a register +// on entry and will be loaded from memory on the undefined path. +// Note that, when the compInitMem option is not set, we may encounter these on +// paths that are protected by the same condition as an earlier def. However, since +// we don't do the analysis to determine this - and couldn't rely on always identifying +// such cases even if we tried - we must conservatively treat the undefined path as +// being possible. This is a relatively rare case, so the introduced conservatism is +// not expected to warrant the analysis required to determine the best placement of +// an initialization. +// +void LinearScan::insertZeroInitRefPositions() +{ + assert(enregisterLocalVars); +#ifdef DEBUG + VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn)); + assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars)); +#endif // DEBUG + + // insert defs for this, then a block boundary + + VarSetOps::Iter iter(compiler, currentLiveVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + if (!varDsc->lvIsParam && isCandidateVar(varDsc)) + { + JITDUMP("V%02u was live in to first block:", varNum); + Interval* interval = getIntervalForLocalVar(varIndex); + if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) + { + JITDUMP(" creating ZeroInit\n"); + GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); + RefPosition* pos = + newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); + varDsc->lvMustInit = true; + } + else + { + setIntervalAsSpilled(interval); + JITDUMP(" marking as spilled\n"); + } + } + } +} + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +//------------------------------------------------------------------------ +// unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems. +// +// Arguments: +// argDsc - the LclVarDsc for the argument of interest +// +// Notes: +// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp +// for how state for argument is updated for unix non-structs and Windows AMD64 structs. +// +void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) +{ + assert(varTypeIsStruct(argDsc)); + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + + if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + else + { + assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); + } + } + + if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) + { + if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); + floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + else + { + assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); + intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); + } + } +} + +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +//------------------------------------------------------------------------ +// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate +// regState (either compiler->intRegState or compiler->floatRegState), +// with the lvArgReg on "argDsc" +// +// Arguments: +// argDsc - the argument for which the state is to be updated. +// +// Return Value: None +// +// Assumptions: +// The argument is live on entry to the function +// (or is untracked and therefore assumed live) +// +// Notes: +// This relies on a method in regAlloc.cpp that is shared between LSRA +// and regAlloc. It is further abstracted here because regState is updated +// separately for tracked and untracked variables in LSRA. +// +void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) +{ +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For System V AMD64 calls the argDsc can have 2 registers (for structs.) + // Handle them here. + if (varTypeIsStruct(argDsc)) + { + unixAmd64UpdateRegStateForArg(argDsc); + } + else +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + // In the case of AMD64 we'll still use the floating point registers + // to model the register usage for argument on vararg calls, so + // we will ignore the varargs condition to determine whether we use + // XMM registers or not for setting up the call. + bool isFloat = (isFloatRegType(argDsc->lvType) +#ifndef _TARGET_AMD64_ + && !compiler->info.compIsVarArgs +#endif + && !compiler->opts.compUseSoftFP); + + if (argDsc->lvIsHfaRegArg()) + { + isFloat = true; + } + + if (isFloat) + { + JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); + compiler->raUpdateRegStateForArg(floatRegState, argDsc); + } + else + { + JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); +#if FEATURE_MULTIREG_ARGS + if (argDsc->lvOtherArgReg != REG_NA) + { + JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg)); + } +#endif // FEATURE_MULTIREG_ARGS + compiler->raUpdateRegStateForArg(intRegState, argDsc); + } + } +} + +//------------------------------------------------------------------------ +// buildIntervals: The main entry point for building the data structures over +// which we will do register allocation. +// +void LinearScan::buildIntervals() +{ + BasicBlock* block; + + JITDUMP("\nbuildIntervals ========\n"); + + // Build (empty) records for all of the physical registers + buildPhysRegRecords(); + +#ifdef DEBUG + if (VERBOSE) + { + printf("\n-----------------\n"); + printf("LIVENESS:\n"); + printf("-----------------\n"); + foreach_block(compiler, block) + { + printf("BB%02u use def in out\n", block->bbNum); + dumpConvertedVarSet(compiler, block->bbVarUse); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbVarDef); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbLiveIn); + printf("\n"); + dumpConvertedVarSet(compiler, block->bbLiveOut); + printf("\n"); + } + } +#endif // DEBUG + +#if DOUBLE_ALIGN + // We will determine whether we should double align the frame during + // identifyCandidates(), but we initially assume that we will not. + doDoubleAlign = false; +#endif + + identifyCandidates(); + + // Figure out if we're going to use a frame pointer. We need to do this before building + // the ref positions, because those objects will embed the frame register in various register masks + // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will + // remove the frame pointer from the masks. + setFrameType(); + + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); + + // second part: + JITDUMP("\nbuildIntervals second part ========\n"); + currentLoc = 0; + // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were + // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering. + // Previously, the block sequencing was done for the (formerly separate) Build pass, + // and the curBBNum was left as the last block sequenced. This block was then used to set the + // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the + // normalized entry weight (compiler->fgCalledCount), but that results in a net regression. + if (!blockSequencingDone) + { + setBlockSequence(); + } + curBBNum = blockSequence[bbSeqCount - 1]->bbNum; + + // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex. + // Assign these RefPositions to the (nonexistent) BB0. + curBBNum = 0; + + LclVarDsc* argDsc; + unsigned int lclNum; + + RegState* intRegState = &compiler->codeGen->intRegState; + RegState* floatRegState = &compiler->codeGen->floatRegState; + intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; + floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; + + for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) + { + lclNum = compiler->lvaTrackedToVarNum[varIndex]; + argDsc = &(compiler->lvaTable[lclNum]); + + if (!argDsc->lvIsParam) + { + continue; + } + + // Only reserve a register if the argument is actually used. + // Is it dead on entry? If compJmpOpUsed is true, then the arguments + // have to be kept alive, so we have to consider it as live on entry. + // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we + // won't have done dataflow on it, but it needs to be marked as live-in so + // it will get saved in the prolog. + if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode) + { + continue; + } + + if (argDsc->lvIsRegArg) + { + updateRegStateForArg(argDsc); + } + + if (isCandidateVar(argDsc)) + { + Interval* interval = getIntervalForLocalVar(varIndex); + regMaskTP mask = allRegs(TypeGet(argDsc)); + if (argDsc->lvIsRegArg) + { + // Set this interval as currently assigned to that register + regNumber inArgReg = argDsc->lvArgReg; + assert(inArgReg < REG_COUNT); + mask = genRegMask(inArgReg); + assignPhysReg(inArgReg, interval); + } + RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); + } + else if (varTypeIsStruct(argDsc->lvType)) + { + for (unsigned fieldVarNum = argDsc->lvFieldLclStart; + fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) + { + LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]); + if (fieldVarDsc->lvLRACandidate) + { + assert(fieldVarDsc->lvTracked); + Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex); + RefPosition* pos = + newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc))); + } + } + } + else + { + // We can overwrite the register (i.e. codegen saves it on entry) + assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister || + !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode)); + } + } + + // Now set up the reg state for the non-tracked args + // (We do this here because we want to generate the ParamDef RefPositions in tracked + // order, so that loop doesn't hit the non-tracked args) + + for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++) + { + argDsc = &(compiler->lvaTable[argNum]); + + if (argDsc->lvPromotedStruct()) + { + noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = argDsc->lvFieldLclStart; + argDsc = &(compiler->lvaTable[fieldVarNum]); + } + noway_assert(argDsc->lvIsParam); + if (!argDsc->lvTracked && argDsc->lvIsRegArg) + { + updateRegStateForArg(argDsc); + } + } + + // If there is a secret stub param, it is also live in + if (compiler->info.compPublishStubParam) + { + intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; + } + + BasicBlock* predBlock = nullptr; + BasicBlock* prevBlock = nullptr; + + // Initialize currentLiveVars to the empty set. We will set it to the current + // live-in at the entry to each block (this will include the incoming args on + // the first block). + VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler)); + + for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) + { + JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum); + + bool predBlockIsAllocated = false; + predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); + if (predBlock) + { + JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n", + block->bbNum, predBlock->bbNum); + assert(predBlock->bbNum <= bbNumMaxBeforeResolution); + blockInfo[block->bbNum].predBBNum = predBlock->bbNum; + } + + if (enregisterLocalVars) + { + VarSetOps::AssignNoCopy(compiler, currentLiveVars, + VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn)); + + if (block == compiler->fgFirstBB) + { + insertZeroInitRefPositions(); + // The first real location is at 1; 0 is for the entry. + currentLoc = 1; + } + + // Any lclVars live-in to a block are resolution candidates. + VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); + + // Determine if we need any DummyDefs. + // We need DummyDefs for cases where "predBlock" isn't really a predecessor. + // Note that it's possible to have uses of unitialized variables, in which case even the first + // block may require DummyDefs, which we are not currently adding - this means that these variables + // will always be considered to be in memory on entry (and reloaded when the use is encountered). + // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized + // variables (which may actually be initialized along the dynamically executed paths, but not + // on all static paths), we wind up with excessive liveranges for some of these variables. + VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); + if (predBlock) + { + // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut + VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); + } + bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); + + // Create dummy def RefPositions + + if (needsDummyDefs) + { + // If we are using locations from a predecessor, we should never require DummyDefs. + assert(!predBlockIsAllocated); + + JITDUMP("Creating dummy definitions\n"); + VarSetOps::Iter iter(compiler, newLiveIn); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. + // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs). + if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam)) + { + Interval* interval = getIntervalForLocalVar(varIndex); + RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, + allRegs(interval->registerType)); + } + } + JITDUMP("Finished creating dummy definitions\n\n"); + } + } + + // Add a dummy RefPosition to mark the block boundary. + // Note that we do this AFTER adding the exposed uses above, because the + // register positions for those exposed uses need to be recorded at + // this point. + + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); + currentLoc += 2; + JITDUMP("\n"); + + LIR::Range& blockRange = LIR::AsRange(block); + for (GenTree* node : blockRange.NonPhiNodes()) + { + // We increment the number position of each tree node by 2 to simplify the logic when there's the case of + // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to + // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only + // increment the number position by one. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + node->gtSeqNum = currentLoc; + // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register + // value. + // Although this looks like a no-op it sets the tag. + node->gtRegNum = node->gtRegNum; +#endif + + buildRefPositionsForNode(node, block, currentLoc); + +#ifdef DEBUG + if (currentLoc > maxNodeLocation) + { + maxNodeLocation = currentLoc; + } +#endif // DEBUG + currentLoc += 2; + } + + // Note: the visited set is cleared in LinearScan::doLinearScan() + markBlockVisited(block); + assert(defList.IsEmpty()); + + if (enregisterLocalVars) + { + // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the + // next block, or any unvisited successors. + // This will address lclVars that are live on a backedge, as well as those that are kept + // live at a GT_JMP. + // + // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, + // and jmp call is represented using GT_JMP node which is a leaf node. + // Liveness phase keeps all the arguments of the method live till the end of + // block by adding them to liveout set of the block containing GT_JMP. + // + // The target of a GT_JMP implicitly uses all the current method arguments, however + // there are no actual references to them. This can cause LSRA to assert, because + // the variables are live but it sees no references. In order to correctly model the + // liveness of these arguments, we add dummy exposed uses, in the same manner as for + // backward branches. This will happen automatically via expUseSet. + // + // Note that a block ending with GT_JMP has no successors and hence the variables + // for which dummy use ref positions are added are arguments of the method. + + VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); + VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); + BasicBlock* nextBlock = getNextBlock(); + if (nextBlock != nullptr) + { + VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); + } + for (BasicBlock* succ : block->GetAllSuccs(compiler)) + { + if (VarSetOps::IsEmpty(compiler, expUseSet)) + { + break; + } + + if (isBlockVisited(succ)) + { + continue; + } + VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); + } + + if (!VarSetOps::IsEmpty(compiler, expUseSet)) + { + JITDUMP("Exposed uses:"); + VarSetOps::Iter iter(compiler, expUseSet); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + assert(isCandidateVar(varDsc)); + Interval* interval = getIntervalForLocalVar(varIndex); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + JITDUMP(" V%02u", varNum); + } + JITDUMP("\n"); + } + + // Clear the "last use" flag on any vars that are live-out from this block. + { + VarSetOps::Iter iter(compiler, block->bbLiveOut); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* const varDsc = &compiler->lvaTable[varNum]; + if (isCandidateVar(varDsc)) + { + RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; + if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) + { + lastRP->lastUse = false; + } + } + } + } + +#ifdef DEBUG + checkLastUses(block); + + if (VERBOSE) + { + printf("use: "); + dumpConvertedVarSet(compiler, block->bbVarUse); + printf("\ndef: "); + dumpConvertedVarSet(compiler, block->bbVarDef); + printf("\n"); + } +#endif // DEBUG + } + + prevBlock = block; + } + + if (enregisterLocalVars) + { + if (compiler->lvaKeepAliveAndReportThis()) + { + // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end + unsigned keepAliveVarNum = compiler->info.compThisArg; + assert(compiler->info.compIsStatic == false); + LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum; + if (isCandidateVar(varDsc)) + { + JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n"); + Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + } + } + +#ifdef DEBUG + if (getLsraExtendLifeTimes()) + { + LclVarDsc* varDsc; + for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) + { + if (varDsc->lvLRACandidate) + { + JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum); + Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + } + } + } +#endif // DEBUG + } + + // If the last block has successors, create a RefTypeBB to record + // what's live + + if (prevBlock->NumSucc(compiler) > 0) + { + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); + } + +#ifdef DEBUG + // Make sure we don't have any blocks that were not visited + foreach_block(compiler, block) + { + assert(isBlockVisited(block)); + } + + if (VERBOSE) + { + lsraDumpIntervals("BEFORE VALIDATING INTERVALS"); + dumpRefPositions("BEFORE VALIDATING INTERVALS"); + validateIntervals(); + } +#endif // DEBUG +} + +#ifdef DEBUG +//------------------------------------------------------------------------ +// validateIntervals: A DEBUG-only method that checks that the lclVar RefPositions +// do not reflect uses of undefined values +// +// Notes: If an undefined use is encountered, it merely prints a message. +// +// TODO-Cleanup: This should probably assert, or at least print the message only +// when doing a JITDUMP. +// +void LinearScan::validateIntervals() +{ + if (enregisterLocalVars) + { + for (unsigned i = 0; i < compiler->lvaTrackedCount; i++) + { + if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate) + { + continue; + } + Interval* interval = getIntervalForLocalVar(i); + + bool defined = false; + printf("-----------------\n"); + for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) + { + ref->dump(); + RefType refType = ref->refType; + if (!defined && RefTypeIsUse(refType)) + { + if (compiler->info.compMethodName != nullptr) + { + printf("%s: ", compiler->info.compMethodName); + } + printf("LocalVar V%02u: undefined use at %u\n", interval->varNum, ref->nodeLocation); + } + // Note that there can be multiple last uses if they are on disjoint paths, + // so we can't really check the lastUse flag + if (ref->lastUse) + { + defined = false; + } + if (RefTypeIsDef(refType)) + { + defined = true; + } + } + } + } +} +#endif // DEBUG + +//------------------------------------------------------------------------ +// GetIndirInfo: Get the source registers for an indirection that might be contained. +// +// Arguments: +// node - The node of interest +// +// Return Value: +// The number of source registers used by the *parent* of this node. +// +// Notes: +// Adds the defining node for each register to the useList. +// +int LinearScan::GetIndirInfo(GenTreeIndir* indirTree) +{ + GenTree* const addr = indirTree->gtOp1; + if (!addr->isContained()) + { + appendLocationInfoToList(addr); + return 1; + } + if (!addr->OperIs(GT_LEA)) + { + return 0; + } + + GenTreeAddrMode* const addrMode = addr->AsAddrMode(); + + unsigned srcCount = 0; + if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) + { + appendLocationInfoToList(addrMode->Base()); + srcCount++; + } + if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) + { + appendLocationInfoToList(addrMode->Index()); + srcCount++; + } + return srcCount; +} + +//------------------------------------------------------------------------ +// GetOperandInfo: Get the source registers for an operand that might be contained. +// +// Arguments: +// node - The node of interest +// useList - The list of uses for the node that we're currently processing +// +// Return Value: +// The number of source registers used by the *parent* of this node. +// +// Notes: +// Adds the defining node for each register to the given useList. +// +int LinearScan::GetOperandInfo(GenTree* node) +{ + if (!node->isContained()) + { + appendLocationInfoToList(node); + return 1; + } + +#if !defined(_TARGET_64BIT_) + if (node->OperIs(GT_LONG)) + { + return appendBinaryLocationInfoToList(node->AsOp()); + } +#endif // !defined(_TARGET_64BIT_) + if (node->OperIsIndir()) + { + const unsigned srcCount = GetIndirInfo(node->AsIndir()); + return srcCount; + } + + return 0; +} + +//------------------------------------------------------------------------ +// GetOperandInfo: Get the source registers for an operand that might be contained. +// +// Arguments: +// node - The node of interest +// useList - The list of uses for the node that we're currently processing +// +// Return Value: +// The number of source registers used by the *parent* of this node. +// +// Notes: +// Adds the defining node for each register to the useList. +// +int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo) +{ + LocationInfoListNode* prevLast = useList.Last(); + int srcCount = GetOperandInfo(node); + if (prevLast == nullptr) + { + *pFirstInfo = useList.Begin(); + } + else + { + *pFirstInfo = prevLast->Next(); + } + return srcCount; +} + +void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node) +{ + _dstCount = 0; + _srcCount = 0; + _internalIntCount = 0; + _internalFloatCount = 0; + + isLocalDefUse = false; + isDelayFree = false; + hasDelayFreeSrc = false; + isTgtPref = false; + isInternalRegDelayFree = false; + + regMaskTP dstCandidates; + + // if there is a reg indicated on the tree node, use that for dstCandidates + // the exception is the NOP, which sometimes show up around late args. + // TODO-Cleanup: get rid of those NOPs. + if (node->gtRegNum == REG_STK) + { + dstCandidates = RBM_NONE; + } + else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP) + { +#ifdef ARM_SOFTFP + if (node->OperGet() == GT_PUTARG_REG) + { + dstCandidates = lsra->allRegs(TYP_INT); + } + else +#endif + { + dstCandidates = lsra->allRegs(node->TypeGet()); + } + } + else + { + dstCandidates = genRegMask(node->gtRegNum); + } + + setDstCandidates(lsra, dstCandidates); + srcCandsIndex = dstCandsIndex; + + setInternalCandidates(lsra, lsra->allRegs(TYP_INT)); + +#ifdef DEBUG + isInitialized = true; +#endif + + assert(IsValid(lsra)); +} + +//------------------------------------------------------------------------ +// getSrcCandidates: Get the source candidates (candidates for the consumer +// of the node) from the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Return Value: +// The set of registers (as a register mask) that are candidates for the +// consumer of the node +// +// Notes: +// The LinearScan object maintains the mapping from the indices kept in the +// TreeNodeInfo to the actual register masks. +// +regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(srcCandsIndex); +} + +//------------------------------------------------------------------------ +// setSrcCandidates: Set the source candidates (candidates for the consumer +// of the node) on the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Notes: see getSrcCandidates +// +void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + srcCandsIndex = (unsigned char)i; +} + +//------------------------------------------------------------------------ +// getDstCandidates: Get the dest candidates (candidates for the definition +// of the node) from the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Return Value: +// The set of registers (as a register mask) that are candidates for the +// node itself +// +// Notes: see getSrcCandidates +// +regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(dstCandsIndex); +} + +//------------------------------------------------------------------------ +// setDstCandidates: Set the dest candidates (candidates for the definition +// of the node) on the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Notes: see getSrcCandidates +// +void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + dstCandsIndex = (unsigned char)i; +} + +//------------------------------------------------------------------------ +// getInternalCandidates: Get the internal candidates (candidates for the internal +// temporary registers used by a node) from the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Return Value: +// The set of registers (as a register mask) that are candidates for the +// internal temporary registers. +// +// Notes: see getSrcCandidates +// +regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra) +{ + return lsra->GetRegMaskForIndex(internalCandsIndex); +} + +//------------------------------------------------------------------------ +// getInternalCandidates: Set the internal candidates (candidates for the internal +// temporary registers used by a node) on the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Notes: see getSrcCandidates +// +void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); + assert(FitsIn<unsigned char>(i)); + internalCandsIndex = (unsigned char)i; +} + +//------------------------------------------------------------------------ +// addInternalCandidates: Add internal candidates (candidates for the internal +// temporary registers used by a node) on the TreeNodeInfo +// +// Arguments: +// lsra - the LinearScan object +// +// Notes: see getSrcCandidates +// +void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask) +{ + LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask); + assert(FitsIn<unsigned char>(i)); + internalCandsIndex = (unsigned char)i; +} + +//------------------------------------------------------------------------ +// BuildStoreLoc: Set register requirements for a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Setting the appropriate candidates for a store of a multi-reg call return value. +// - Handling of contained immediates. +// - Requesting an internal register for SIMD12 stores. +// +void LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + TreeNodeInfo* info = currentNodeInfo; + GenTree* op1 = storeLoc->gtGetOp1(); + + assert(info->dstCount == 0); + + if (op1->IsMultiRegCall()) + { + // This is the case of var = call where call is returning + // a value in multiple return registers. + // Must be a store lclvar. + assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); + + // srcCount = number of registers in which the value is returned by call + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + info->srcCount = regCount; + + // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 + regMaskTP srcCandidates = allMultiRegCallNodeRegs(call); + LocationInfoListNode* locInfo = getLocationInfo(op1); + locInfo->info.setSrcCandidates(this, srcCandidates); + useList.Append(locInfo); + } +#ifndef _TARGET_64BIT_ + else if (varTypeIsLong(op1)) + { + if (op1->OperIs(GT_MUL_LONG)) + { +#ifdef _TARGET_X86_ + // This is actually a bug. A GT_MUL_LONG produces two registers, but is modeled as only producing + // eax (and killing edx). This only works because it always occurs as var = GT_MUL_LONG (ensured by + // DecomposeMul), and therefore edx won't be reused before the store. + // TODO-X86-Cleanup: GT_MUL_LONG should be a multireg node on x86, just as on ARM. + info->srcCount = 1; +#else + info->srcCount = 2; +#endif + appendLocationInfoToList(op1); + } + else + { + assert(op1->OperIs(GT_LONG)); + assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained()); + info->srcCount = appendBinaryLocationInfoToList(op1->AsOp()); + assert(info->srcCount == 2); + } + } +#endif // !_TARGET_64BIT_ + else if (op1->isContained()) + { + info->srcCount = 0; + } + else + { + info->srcCount = 1; + appendLocationInfoToList(op1); + } + +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(storeLoc)) + { + if (!op1->isContained() && (storeLoc->TypeGet() == TYP_SIMD12)) + { +// Need an additional register to extract upper 4 bytes of Vector3. +#ifdef _TARGET_XARCH_ + info->internalFloatCount = 1; + info->setInternalCandidates(this, allSIMDRegs()); +#elif defined(_TARGET_ARM64_) + info->internalIntCount = 1; +#else +#error "Unknown target architecture for STORE_LCL_VAR of SIMD12" +#endif + } + } +#endif // FEATURE_SIMD +} + +//------------------------------------------------------------------------ +// BuildSimple: Sets the srcCount for all the trees +// without special handling based on the tree node type. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void LinearScan::BuildSimple(GenTree* tree) +{ + TreeNodeInfo* info = currentNodeInfo; + unsigned kind = tree->OperKind(); + assert(info->dstCount == (tree->IsValue() ? 1 : 0)); + if (kind & (GTK_CONST | GTK_LEAF)) + { + info->srcCount = 0; + } + else if (kind & (GTK_SMPOP)) + { + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + } + else + { + unreached(); + } +} + +//------------------------------------------------------------------------ +// BuildReturn: Set the NodeInfo for a GT_RETURN. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void LinearScan::BuildReturn(GenTree* tree) +{ + TreeNodeInfo* info = currentNodeInfo; + assert(info->dstCount == 0); + GenTree* op1 = tree->gtGetOp1(); + +#if !defined(_TARGET_64BIT_) + if (tree->TypeGet() == TYP_LONG) + { + assert((op1->OperGet() == GT_LONG) && op1->isContained()); + GenTree* loVal = op1->gtGetOp1(); + GenTree* hiVal = op1->gtGetOp2(); + info->srcCount = 2; + LocationInfoListNode* loValInfo = getLocationInfo(loVal); + LocationInfoListNode* hiValInfo = getLocationInfo(hiVal); + loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO); + hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI); + useList.Append(loValInfo); + useList.Append(hiValInfo); + } + else +#endif // !defined(_TARGET_64BIT_) + if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) + { + regMaskTP useCandidates = RBM_NONE; + + info->srcCount = 1; + +#if FEATURE_MULTIREG_RET + if (varTypeIsStruct(tree)) + { + // op1 has to be either an lclvar or a multi-reg returning call + if (op1->OperGet() != GT_LCL_VAR) + { + noway_assert(op1->IsMultiRegCall()); + + ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); + info->srcCount = retTypeDesc->GetReturnRegCount(); + useCandidates = retTypeDesc->GetABIReturnRegs(); + } + } + else +#endif // FEATURE_MULTIREG_RET + { + // Non-struct type return - determine useCandidates + switch (tree->TypeGet()) + { + case TYP_VOID: + useCandidates = RBM_NONE; + break; + case TYP_FLOAT: + useCandidates = RBM_FLOATRET; + break; + case TYP_DOUBLE: + // We ONLY want the valid double register in the RBM_DOUBLERET mask. + useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); + break; + case TYP_LONG: + useCandidates = RBM_LNGRET; + break; + default: + useCandidates = RBM_INTRET; + break; + } + } + + LocationInfoListNode* locationInfo = getLocationInfo(op1); + if (useCandidates != RBM_NONE) + { + locationInfo->info.setSrcCandidates(this, useCandidates); + } + useList.Append(locationInfo); + } +} + +//------------------------------------------------------------------------ +// BuildPutArgReg: Set the NodeInfo for a PUTARG_REG. +// +// Arguments: +// node - The PUTARG_REG node. +// argReg - The register in which to pass the argument. +// info - The info for the node's using call. +// isVarArgs - True if the call uses a varargs calling convention. +// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. +// +// Return Value: +// None. +// +void LinearScan::BuildPutArgReg(GenTreeUnOp* node) +{ + TreeNodeInfo* info = currentNodeInfo; + assert(node != nullptr); + assert(node->OperIsPutArgReg()); + info->srcCount = 1; + regNumber argReg = node->gtRegNum; + assert(argReg != REG_NA); + + // Set the register requirements for the node. + regMaskTP argMask = genRegMask(argReg); + +#ifdef _TARGET_ARM_ + // If type of node is `long` then it is actually `double`. + // The actual `long` types must have been transformed as a field list with two fields. + if (node->TypeGet() == TYP_LONG) + { + info->srcCount++; + info->dstCount = info->srcCount; + assert(genRegArgNext(argReg) == REG_NEXT(argReg)); + argMask |= genRegMask(REG_NEXT(argReg)); + } +#endif // _TARGET_ARM_ + info->setDstCandidates(this, argMask); + info->setSrcCandidates(this, argMask); + + // To avoid redundant moves, have the argument operand computed in the + // register in which the argument is passed to the call. + LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1); + op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this)); + op1Info->info.isDelayFree = true; + useList.Append(op1Info); +} + +//------------------------------------------------------------------------ +// HandleFloatVarArgs: Handle additional register requirements for a varargs call +// +// Arguments: +// call - The call node of interest +// argNode - The current argument +// +// Return Value: +// None. +// +// Notes: +// In the case of a varargs call, the ABI dictates that if we have floating point args, +// we must pass the enregistered arguments in both the integer and floating point registers. +// Since the integer register is not associated with the arg node, we will reserve it as +// an internal register on the call so that it is not used during the evaluation of the call node +// (e.g. for the target). +void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs) +{ +#if FEATURE_VARARG + TreeNodeInfo* info = currentNodeInfo; + if (call->IsVarargs() && varTypeIsFloating(argNode)) + { + *callHasFloatRegArgs = true; + + regNumber argReg = argNode->gtRegNum; + regNumber targetReg = compiler->getCallArgIntRegister(argReg); + info->setInternalIntCount(info->internalIntCount + 1); + info->addInternalCandidates(this, genRegMask(targetReg)); + } +#endif // FEATURE_VARARG +} + +//------------------------------------------------------------------------ +// BuildGCWriteBarrier: Handle additional register requirements for a GC write barrier +// +// Arguments: +// tree - The STORE_IND for which a write barrier is required +// +void LinearScan::BuildGCWriteBarrier(GenTree* tree) +{ + TreeNodeInfo* info = currentNodeInfo; + GenTree* dst = tree; + GenTree* addr = tree->gtOp.gtOp1; + GenTree* src = tree->gtOp.gtOp2; + LocationInfoListNode* addrInfo = getLocationInfo(addr); + LocationInfoListNode* srcInfo = getLocationInfo(src); + + // In the case where we are doing a helper assignment, even if the dst + // is an indir through an lea, we need to actually instantiate the + // lea in a register + assert(!addr->isContained() && !src->isContained()); + useList.Append(addrInfo); + useList.Append(srcInfo); + info->srcCount = 2; + assert(info->dstCount == 0); + bool customSourceRegs = false; + +#if NOGC_WRITE_BARRIERS + +#if defined(_TARGET_ARM64_) + // For the NOGC JIT Helper calls + // + // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) + // the 'src' goes into x15 (REG_WRITE_BARRIER) + // + addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF); + srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); + customSourceRegs = true; + +#elif defined(_TARGET_X86_) + + bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src); + + if (useOptimizedWriteBarrierHelper) + { + // Special write barrier: + // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and + // op2 (src) goes into any int register. + addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); + srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC); + customSourceRegs = true; + } +#else // !defined(_TARGET_X86_) && !defined(_TARGET_ARM64_) +#error "NOGC_WRITE_BARRIERS is not supported" +#endif // !defined(_TARGET_X86_) + +#endif // NOGC_WRITE_BARRIERS + + if (!customSourceRegs) + { + // For the standard JIT Helper calls: + // op1 (addr) goes into REG_ARG_0 and + // op2 (src) goes into REG_ARG_1 + addrInfo->info.setSrcCandidates(this, RBM_ARG_0); + srcInfo->info.setSrcCandidates(this, RBM_ARG_1); + } + + // Both src and dst must reside in a register, which they should since we haven't set + // either of them as contained. + assert(addrInfo->info.dstCount == 1); + assert(srcInfo->info.dstCount == 1); +} + +//------------------------------------------------------------------------ +// BuildCmp: Set the register requirements for a compare. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void LinearScan::BuildCmp(GenTree* tree) +{ + TreeNodeInfo* info = currentNodeInfo; + assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP)); + + info->srcCount = 0; + assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID)); + +#ifdef _TARGET_X86_ + // If the compare is used by a jump, we just need to set the condition codes. If not, then we need + // to store the result into the low byte of a register, which requires the dst be a byteable register. + // We always set the dst candidates, though, because if this is compare is consumed by a jump, they + // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear + // that flag is maintained until this location (especially for decomposed long compares). + info->setDstCandidates(this, RBM_BYTE_REGS); +#endif // _TARGET_X86_ + + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); +} + +#endif // !LEGACY_BACKEND |