diff options
Diffstat (limited to 'src/jit/lsra.cpp')
-rw-r--r-- | src/jit/lsra.cpp | 2688 |
1 files changed, 2 insertions, 2686 deletions
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 641a9a1d11..07ce5503cb 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -333,36 +333,6 @@ regMaskTP LinearScan::internalFloatRegCandidates() } /***************************************************************************** - * Register types - *****************************************************************************/ -template <class T> -RegisterType regType(T type) -{ -#ifdef FEATURE_SIMD - if (varTypeIsSIMD(type)) - { - return FloatRegisterType; - } -#endif // FEATURE_SIMD - return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType; -} - -bool useFloatReg(var_types type) -{ - return (regType(type) == FloatRegisterType); -} - -bool registerTypesEquivalent(RegisterType a, RegisterType b) -{ - return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b); -} - -bool isSingleRegister(regMaskTP regMask) -{ - return (regMask != RBM_NONE && genMaxOneBit(regMask)); -} - -/***************************************************************************** * Inline functions for RegRecord *****************************************************************************/ @@ -470,198 +440,6 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) } #endif // DEBUG -// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately -// set such fields as isStructField - -Interval* LinearScan::newInterval(RegisterType theRegisterType) -{ - intervals.emplace_back(theRegisterType, allRegs(theRegisterType)); - Interval* newInt = &intervals.back(); - -#ifdef DEBUG - newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1); -#endif // DEBUG - - DBEXEC(VERBOSE, newInt->dump()); - return newInt; -} - -RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) -{ - refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType); - RefPosition* newRP = &refPositions.back(); -#ifdef DEBUG - newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1); -#endif // DEBUG - return newRP; -} - -//------------------------------------------------------------------------ -// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use -// register requirements on a single-def, single-use interval. -// -// Arguments: -// defRefPosition - The interval definition -// useRefPosition - The (sole) interval use -// -// Return Value: -// None. -// -// Assumptions: -// The two RefPositions are for the same interval, which is a tree-temp. -// -// Notes: -// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg. -// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that, -// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs -// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4 -// below. -// Here are the cases we consider (in this order): -// 1. If The defRefPosition specifies a single register, and there are no conflicting -// FixedReg uses of it between the def and use, we use that register, and the code generator -// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def. -// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no -// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator -// will insert the copy. -// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined -// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register), -/// we set the register requirements on the defRefPosition to the use registers, and the -// code generator will insert a copy on the def. We can't rely on the code generator to put a copy -// on the use if it has multiple possible candidates, as it won't know which one has been allocated. -// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register -// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set -// the useRefPosition to the def registers, for similar reasons to case #3. -// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts, -// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are -// single registers, codegen can insert the copy. -// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but -// has a conflict), we must insert a copy. The copy will be inserted before the use if the -// use is not fixed (in the fixed case, the code generator will insert the use). -// -// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is -// available for the lifetime. We end up allocating a register that must be spilled, and it probably -// won't be the register that is actually defined by the target instruction. So, we have to copy it -// and THEN spill it. In this case, we should be using the def requirement. But we need to change -// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but -// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway -// we can use the fixed-reg on the def. -// - -void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition) -{ - assert(!interval->isLocalVar); - - RefPosition* useRefPosition = defRefPosition->nextRefPosition; - regMaskTP defRegAssignment = defRefPosition->registerAssignment; - regMaskTP useRegAssignment = useRefPosition->registerAssignment; - RegRecord* defRegRecord = nullptr; - RegRecord* useRegRecord = nullptr; - regNumber defReg = REG_NA; - regNumber useReg = REG_NA; - bool defRegConflict = false; - bool useRegConflict = false; - - // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment - // on it, or we will fail to ensure that the fixedReg is busy at the time the target - // (of the node that uses this interval) is allocated. - bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree; - - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT)); - if (!canChangeUseAssignment) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE)); - } - if (defRefPosition->isFixedRegRef) - { - defReg = defRefPosition->assignedReg(); - defRegRecord = getRegisterRecord(defReg); - if (canChangeUseAssignment) - { - RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; - assert(currFixedRegRefPosition != nullptr && - currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation); - - if (currFixedRegRefPosition->nextRefPosition == nullptr || - currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation()) - { - // This is case #1. Use the defRegAssignment - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1)); - useRefPosition->registerAssignment = defRegAssignment; - return; - } - else - { - defRegConflict = true; - } - } - } - if (useRefPosition->isFixedRegRef) - { - useReg = useRefPosition->assignedReg(); - useRegRecord = getRegisterRecord(useReg); - RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition; - - // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null. - RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition(); - assert(nextFixedRegRefPosition != nullptr && - nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation); - - // First, check to see if there are any conflicting FixedReg references between the def and use. - if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation) - { - // OK, no conflicting FixedReg references. - // Now, check to see whether it is currently in use. - if (useRegRecord->assignedInterval != nullptr) - { - RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition; - LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation(); - if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation) - { - useRegConflict = true; - } - } - if (!useRegConflict) - { - // This is case #2. Use the useRegAssignment - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2)); - defRefPosition->registerAssignment = useRegAssignment; - return; - } - } - else - { - useRegConflict = true; - } - } - if (defRegRecord != nullptr && !useRegConflict) - { - // This is case #3. - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3)); - defRefPosition->registerAssignment = useRegAssignment; - return; - } - if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) - { - // This is case #4. - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4)); - useRefPosition->registerAssignment = defRegAssignment; - return; - } - if (defRegRecord != nullptr && useRegRecord != nullptr) - { - // This is case #5. - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5)); - RegisterType regType = interval->registerType; - assert((getRegisterType(interval, defRefPosition) == regType) && - (getRegisterType(interval, useRefPosition) == regType)); - regMaskTP candidates = allRegs(regType); - defRefPosition->registerAssignment = candidates; - return; - } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6)); - return; -} - //------------------------------------------------------------------------ // conflictingFixedRegReference: Determine whether the current RegRecord has a // fixed register use that conflicts with 'refPosition' @@ -708,242 +486,6 @@ bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition) return false; } -void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) -{ -#ifdef _TARGET_AMD64_ - if (compiler->opts.compDbgEnC) - { - // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs. - return; - } -#endif // _TARGET_AMD64_ - - Interval* theInterval = rp->getInterval(); - -#ifdef DEBUG - if (!doReverseCallerCallee()) -#endif // DEBUG - { - // Set preferences so that this register set will be preferred for earlier refs - theInterval->updateRegisterPreferences(rp->registerAssignment); - } -} - -//------------------------------------------------------------------------ -// checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps. -// -// Arguments: -// useRP - The use RefPosition of a tree temp (SDSU Interval) -// -// Notes: -// There are a couple of cases where this may over-constrain allocation: -// 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or -// 2. In the case where the defining node requires a temp distinct from the target (also a -// delay-free case). -// In those cases, if we propagate a single-register restriction from the consumer to the producer -// the delayed uses will not see a fixed reference in the PhysReg at that position, and may -// incorrectly allocate that register. -// TODO-CQ: This means that we may often require a copy at the use of this node's result. -// This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is -// created, causing a RefTypeFixedRef to be added at that location. This, however, results in -// more PhysReg RefPositions (a throughput impact), and a large number of diffs that require -// further analysis to determine benefit. -// See Issue #11274. -// -void LinearScan::checkConflictingDefUse(RefPosition* useRP) -{ - assert(useRP->refType == RefTypeUse); - Interval* theInterval = useRP->getInterval(); - assert(!theInterval->isLocalVar); - - RefPosition* defRP = theInterval->firstRefPosition; - - // All defs must have a valid treeNode, but we check it below to be conservative. - assert(defRP->treeNode != nullptr); - regMaskTP prevAssignment = defRP->registerAssignment; - regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); - if (newAssignment != RBM_NONE) - { - if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) - { - defRP->registerAssignment = newAssignment; - } - } - else - { - theInterval->hasConflictingDefUse = true; - } -} - -void LinearScan::associateRefPosWithInterval(RefPosition* rp) -{ - Referenceable* theReferent = rp->referent; - - if (theReferent != nullptr) - { - // All RefPositions except the dummy ones at the beginning of blocks - - if (rp->isIntervalRef()) - { - Interval* theInterval = rp->getInterval(); - - applyCalleeSaveHeuristics(rp); - - if (theInterval->isLocalVar) - { - if (RefTypeIsUse(rp->refType)) - { - RefPosition* const prevRP = theInterval->recentRefPosition; - if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum)) - { - prevRP->lastUse = false; - } - } - - rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) && - (rp->refType != RefTypeZeroInit) && !extendLifetimes(); - } - else if (rp->refType == RefTypeUse) - { - checkConflictingDefUse(rp); - rp->lastUse = true; - } - } - - RefPosition* prevRP = theReferent->recentRefPosition; - if (prevRP != nullptr) - { - prevRP->nextRefPosition = rp; - } - else - { - theReferent->firstRefPosition = rp; - } - theReferent->recentRefPosition = rp; - theReferent->lastRefPosition = rp; - } - else - { - assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); - } -} - -//--------------------------------------------------------------------------- -// newRefPosition: allocate and initialize a new RefPosition. -// -// Arguments: -// reg - reg number that identifies RegRecord to be associated -// with this RefPosition -// theLocation - LSRA location of RefPosition -// theRefType - RefPosition type -// theTreeNode - GenTree node for which this RefPosition is created -// mask - Set of valid registers for this RefPosition -// multiRegIdx - register position if this RefPosition corresponds to a -// multi-reg call node. -// -// Return Value: -// a new RefPosition -// -RefPosition* LinearScan::newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) -{ - RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); - - newRP->setReg(getRegisterRecord(reg)); - newRP->registerAssignment = mask; - - newRP->setMultiRegIdx(0); - newRP->setAllocateIfProfitable(false); - - associateRefPosWithInterval(newRP); - - DBEXEC(VERBOSE, newRP->dump()); - return newRP; -} - -//--------------------------------------------------------------------------- -// newRefPosition: allocate and initialize a new RefPosition. -// -// Arguments: -// theInterval - interval to which RefPosition is associated with. -// theLocation - LSRA location of RefPosition -// theRefType - RefPosition type -// theTreeNode - GenTree node for which this RefPosition is created -// mask - Set of valid registers for this RefPosition -// multiRegIdx - register position if this RefPosition corresponds to a -// multi-reg call node. -// -// Return Value: -// a new RefPosition -// -RefPosition* LinearScan::newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx /* = 0 */) -{ -#ifdef DEBUG - if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) - { - // In the case we're using floating point registers we must make sure - // this flag was set previously in the compiler since this will mandate - // whether LSRA will take into consideration FP reg killsets. - assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0)); - } -#endif // DEBUG - - // If this reference is constrained to a single register (and it's not a dummy - // or Kill reftype already), add a RefTypeFixedReg at this location so that its - // availability can be more accurately determined - - bool isFixedRegister = isSingleRegister(mask); - bool insertFixedRef = false; - if (isFixedRegister) - { - // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB) - if (theRefType == RefTypeUse || theRefType == RefTypeDef) - { - insertFixedRef = true; - } - } - - if (insertFixedRef) - { - regNumber physicalReg = genRegNumFromMask(mask); - RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); - assert(theInterval != nullptr); - assert((allRegs(theInterval->registerType) & mask) != 0); - } - - RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); - - newRP->setInterval(theInterval); - - // Spill info - newRP->isFixedRegRef = isFixedRegister; - -#ifndef _TARGET_AMD64_ - // We don't need this for AMD because the PInvoke method epilog code is explicit - // at register allocation time. - if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged && - theInterval->varNum == compiler->genReturnLocal) - { - mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); - noway_assert(mask != RBM_NONE); - } -#endif // !_TARGET_AMD64_ - newRP->registerAssignment = mask; - - newRP->setMultiRegIdx(multiRegIdx); - newRP->setAllocateIfProfitable(false); - - associateRefPosWithInterval(newRP); - - DBEXEC(VERBOSE, newRP->dump()); - return newRP; -} - /***************************************************************************** * Inline functions for Interval *****************************************************************************/ @@ -1122,6 +664,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #endif // MEASURE_MEM_ALLOC , intervals(LinearScanMemoryAllocatorInterval(theCompiler)) , refPositions(LinearScanMemoryAllocatorRefPosition(theCompiler)) + , listNodePool(theCompiler) { #ifdef DEBUG maxNodeLocation = 0; @@ -1949,42 +1492,6 @@ void LinearScan::identifyCandidatesExceptionDataflow() } } -//------------------------------------------------------------------------ -// IsContainableMemoryOp: Checks whether this is a memory op that can be contained. -// -// Arguments: -// node - the node of interest. -// -// Return value: -// True if this will definitely be a memory reference that could be contained. -// -// Notes: -// This differs from the isMemoryOp() method on GenTree because it checks for -// the case of doNotEnregister local. This won't include locals that -// for some other reason do not become register candidates, nor those that get -// spilled. -// Also, because we usually call this before we redo dataflow, any new lclVars -// introduced after the last dataflow analysis will not yet be marked lvTracked, -// so we don't use that. -// -bool LinearScan::isContainableMemoryOp(GenTree* node) -{ - if (node->isMemoryOp()) - { - return true; - } - if (node->IsLocal()) - { - if (!enregisterLocalVars) - { - return true; - } - LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum]; - return varDsc->lvDoNotEnregister; - } - return false; -} - bool LinearScan::isRegCandidate(LclVarDsc* varDsc) { // We shouldn't be called if opt settings do not permit register variables. @@ -2618,24 +2125,6 @@ VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarT return inVarToRegMap; } -// given a tree node -RefType refTypeForLocalRefNode(GenTree* node) -{ - assert(node->IsLocal()); - - // We don't support updates - assert((node->gtFlags & GTF_VAR_USEASG) == 0); - - if (node->gtFlags & GTF_VAR_DEF) - { - return RefTypeDef; - } - else - { - return RefTypeUse; - } -} - //------------------------------------------------------------------------ // checkLastUses: Check correctness of last use flags // @@ -2776,1486 +2265,6 @@ void LinearScan::checkLastUses(BasicBlock* block) } #endif // DEBUG -void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) -{ - for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) - { - if (mask & 1) - { - // This assumes that these are all "special" RefTypes that - // don't need to be recorded on the tree (hence treeNode is nullptr) - RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, - genRegMask(reg)); // This MUST occupy the physical register (obviously) - - if (isLastUse) - { - pos->lastUse = true; - } - } - } -} - -//------------------------------------------------------------------------ -// getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node. -// If the GT_STOREIND will generate a write barrier, determine the specific kill -// set required by the case-specific, platform-specific write barrier. If no -// write barrier is required, the kill set will be RBM_NONE. -// -// Arguments: -// tree - the GT_STOREIND node -// -// Return Value: a register mask of the registers killed -// -regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) -{ - assert(tree->OperIs(GT_STOREIND)); - - regMaskTP killMask = RBM_NONE; - - GenTree* data = tree->Data(); - - GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data); - if (writeBarrierForm != GCInfo::WBF_NoBarrier) - { - if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm)) - { - // We can't determine the exact helper to be used at this point, because it depends on - // the allocated register for the `data` operand. However, all the (x86) optimized - // helpers have the same kill set: EDX. - killMask = RBM_CALLEE_TRASH_NOGC; - } - else - { - // Figure out which helper we're going to use, and then get the kill set for that helper. - CorInfoHelpFunc helper = - compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm); - killMask = compiler->compHelperCallKillSet(helper); - } - } - - return killMask; -} - -//------------------------------------------------------------------------ -// getKillSetForNode: Return the registers killed by the given tree node. -// -// Arguments: -// compiler - the compiler context to use -// tree - the tree for which the kill set is needed. -// -// Return Value: a register mask of the registers killed -// -regMaskTP LinearScan::getKillSetForNode(GenTree* tree) -{ - regMaskTP killMask = RBM_NONE; - switch (tree->OperGet()) - { -#ifdef _TARGET_XARCH_ - case GT_MUL: - // We use the 128-bit multiply when performing an overflow checking unsigned multiply - // - if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx()) - { - // Both RAX and RDX are killed by the operation - killMask = RBM_RAX | RBM_RDX; - } - break; - - case GT_MULHI: -#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) - case GT_MUL_LONG: -#endif - killMask = RBM_RAX | RBM_RDX; - break; - - case GT_MOD: - case GT_DIV: - case GT_UMOD: - case GT_UDIV: - if (!varTypeIsFloating(tree->TypeGet())) - { - // Both RAX and RDX are killed by the operation - killMask = RBM_RAX | RBM_RDX; - } - break; -#endif // _TARGET_XARCH_ - - case GT_STORE_OBJ: - if (tree->OperIsCopyBlkOp()) - { - assert(tree->AsObj()->gtGcPtrCount != 0); - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF); - break; - } - __fallthrough; - - case GT_STORE_BLK: - case GT_STORE_DYN_BLK: - { - GenTreeBlk* blkNode = tree->AsBlk(); - bool isCopyBlk = varTypeIsStruct(blkNode->Data()); - switch (blkNode->gtBlkOpKind) - { - case GenTreeBlk::BlkOpKindHelper: - if (isCopyBlk) - { - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); - } - else - { - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); - } - break; - -#ifdef _TARGET_XARCH_ - case GenTreeBlk::BlkOpKindRepInstr: - if (isCopyBlk) - { - // rep movs kills RCX, RDI and RSI - killMask = RBM_RCX | RBM_RDI | RBM_RSI; - } - else - { - // rep stos kills RCX and RDI. - // (Note that the Data() node, if not constant, will be assigned to - // RCX, but it's find that this kills it, as the value is not available - // after this node in any case.) - killMask = RBM_RDI | RBM_RCX; - } - break; -#else - case GenTreeBlk::BlkOpKindRepInstr: -#endif - case GenTreeBlk::BlkOpKindUnroll: - case GenTreeBlk::BlkOpKindInvalid: - // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE - break; - } - } - break; - - case GT_RETURNTRAP: - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); - break; - case GT_CALL: -#ifdef _TARGET_X86_ - if (compiler->compFloatingPointUsed) - { - if (tree->TypeGet() == TYP_DOUBLE) - { - needDoubleTmpForFPCall = true; - } - else if (tree->TypeGet() == TYP_FLOAT) - { - needFloatTmpForFPCall = true; - } - } -#endif // _TARGET_X86_ -#if defined(_TARGET_X86_) || defined(_TARGET_ARM_) - if (tree->IsHelperCall()) - { - GenTreeCall* call = tree->AsCall(); - CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); - killMask = compiler->compHelperCallKillSet(helpFunc); - } - else -#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) - { - // if there is no FP used, we can ignore the FP kills - if (compiler->compFloatingPointUsed) - { - killMask = RBM_CALLEE_TRASH; - } - else - { - killMask = RBM_INT_CALLEE_TRASH; - } -#ifdef _TARGET_ARM_ - if (tree->AsCall()->IsVirtualStub()) - { - killMask |= compiler->virtualStubParamInfo->GetRegMask(); - } -#else // !_TARGET_ARM_ - // Verify that the special virtual stub call registers are in the kill mask. - // We don't just add them unconditionally to the killMask because for most architectures - // they are already in the RBM_CALLEE_TRASH set, - // and we don't want to introduce extra checks and calls in this hot function. - assert(!tree->AsCall()->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == - compiler->virtualStubParamInfo->GetRegMask())); -#endif - } - break; - case GT_STOREIND: - killMask = getKillSetForStoreInd(tree->AsStoreInd()); - break; - -#if defined(PROFILING_SUPPORTED) - // If this method requires profiler ELT hook then mark these nodes as killing - // callee trash registers (excluding RAX and XMM0). The reason for this is that - // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for - // more details. - case GT_RETURN: - if (compiler->compIsProfilerHookNeeded()) - { - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE); - } - break; - - case GT_PROF_HOOK: - if (compiler->compIsProfilerHookNeeded()) - { - killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL); - } - break; -#endif // PROFILING_SUPPORTED - - default: - // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE - break; - } - return killMask; -} - -//------------------------------------------------------------------------ -// buildKillPositionsForNode: -// Given some tree node add refpositions for all the registers this node kills -// -// Arguments: -// tree - the tree for which kill positions should be generated -// currentLoc - the location at which the kills should be added -// -// Return Value: -// true - kills were inserted -// false - no kills were inserted -// -// Notes: -// The return value is needed because if we have any kills, we need to make sure that -// all defs are located AFTER the kills. On the other hand, if there aren't kills, -// the multiple defs for a regPair are in different locations. -// If we generate any kills, we will mark all currentLiveVars as being preferenced -// to avoid the killed registers. This is somewhat conservative. - -bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc) -{ - regMaskTP killMask = getKillSetForNode(tree); - bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); - if (killMask != RBM_NONE) - { - // The killMask identifies a set of registers that will be used during codegen. - // Mark these as modified here, so when we do final frame layout, we'll know about - // all these registers. This is especially important if killMask contains - // callee-saved registers, which affect the frame size since we need to save/restore them. - // In the case where we have a copyBlk with GC pointers, can need to call the - // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if - // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen, - // which is too late. - compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true)); - - addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); - - // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee - // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs, - // many of which appear to be regressions (because there is more spill on the infrequently path), - // but are not really because the frequent path becomes smaller. Validating these diffs will need - // to be done before making this change. - // if (!blockSequence[curBBSeqNum]->isRunRarely()) - if (enregisterLocalVars) - { - VarSetOps::Iter iter(compiler, currentLiveVars); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) - { - if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) - { - continue; - } - } - else -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varTypeIsFloating(varDsc) && - !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex)) - { - continue; - } - Interval* interval = getIntervalForLocalVar(varIndex); - if (isCallKill) - { - interval->preferCalleeSave = true; - } - regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); - - if (newPreferences != RBM_NONE) - { - interval->updateRegisterPreferences(newPreferences); - } - else - { - // If there are no callee-saved registers, the call could kill all the registers. - // This is a valid state, so in that case assert should not trigger. The RA will spill in order to - // free a register later. - assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); - } - } - } - - if (compiler->killGCRefs(tree)) - { - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, - (allRegs(TYP_REF) & ~RBM_ARG_REGS)); - } - return true; - } - - return false; -} - -//---------------------------------------------------------------------------- -// defineNewInternalTemp: Defines a ref position for an internal temp. -// -// Arguments: -// tree - Gentree node requiring an internal register -// regType - Register type -// currentLoc - Location of the temp Def position -// regMask - register mask of candidates for temp -// -RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) -{ - Interval* current = newInterval(regType); - current->isInternal = true; - return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0); -} - -//------------------------------------------------------------------------ -// buildInternalRegisterDefsForNode - build Def positions for internal -// registers required for tree node. -// -// Arguments: -// tree - Gentree node that needs internal registers -// temps - in-out array which is populated with ref positions -// created for Def of internal registers -// -// Returns: -// The total number of Def positions created for internal registers of tree no. -int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* temps[]) -{ - int count; - int internalIntCount = info->internalIntCount; - regMaskTP internalCands = info->getInternalCandidates(this); - - // If the number of internal integer registers required is the same as the number of candidate integer registers in - // the candidate set, then they must be handled as fixed registers. - // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.) - bool fixedRegs = false; - regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT)); - if (((int)genCountBits(internalIntCandidates)) == internalIntCount) - { - fixedRegs = true; - } - - for (count = 0; count < internalIntCount; count++) - { - regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT)); - if (fixedRegs) - { - internalIntCands = genFindLowestBit(internalIntCands); - internalCands &= ~internalIntCands; - } - temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands); - } - - int internalFloatCount = info->internalFloatCount; - for (int i = 0; i < internalFloatCount; i++) - { - regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates()); - temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, internalFPCands); - } - - assert(count < MaxInternalRegisters); - assert(count == (internalIntCount + internalFloatCount)); - return count; -} - -//------------------------------------------------------------------------ -// buildInternalRegisterUsesForNode - adds Use positions for internal -// registers required for tree node. -// -// Arguments: -// tree - Gentree node that needs internal registers -// defs - int array containing Def positions of internal -// registers. -// total - Total number of Def positions in 'defs' array. -// -// Returns: -// Void. -void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total) -{ - assert(total < MaxInternalRegisters); - - // defs[] has been populated by buildInternalRegisterDefsForNode - // now just add uses to the defs previously added. - for (int i = 0; i < total; i++) - { - RefPosition* prevRefPosition = defs[i]; - assert(prevRefPosition != nullptr); - regMaskTP mask = prevRefPosition->registerAssignment; - if (prevRefPosition->isPhysRegRef) - { - newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask); - } - else - { - RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0); - - if (info->isInternalRegDelayFree) - { - newest->delayRegFree = true; - } - } - } -} - -RegisterType LinearScan::getDefType(GenTree* tree) -{ - return tree->TypeGet(); -} - -//------------------------------------------------------------------------ -// LocationInfoListNodePool: manages a pool of `LocationInfoListNode` -// values to decrease overall memory usage -// during `buildIntervals`. -// -// `buildIntervals` involves creating a list of location info values per -// node that either directly produces a set of registers or that is a -// contained node with register-producing sources. However, these lists -// are short-lived: they are destroyed once the use of the corresponding -// node is processed. As such, there is typically only a small number of -// `LocationInfoListNode` values in use at any given time. Pooling these -// values avoids otherwise frequent allocations. -class LocationInfoListNodePool final -{ - LocationInfoListNode* m_freeList; - Compiler* m_compiler; - -public: - //------------------------------------------------------------------------ - // LocationInfoListNodePool::LocationInfoListNodePool: - // Creates a pool of `LocationInfoListNode` values. - // - // Arguments: - // compiler - The compiler context. - // preallocate - The number of nodes to preallocate. - // - LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = 0) : m_compiler(compiler) - { - if (preallocate > 0) - { - size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate; - LocationInfoListNode* preallocatedNodes = - reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA)); - - LocationInfoListNode* head = preallocatedNodes; - head->m_next = nullptr; - - for (unsigned i = 1; i < preallocate; i++) - { - LocationInfoListNode* node = &preallocatedNodes[i]; - node->m_next = head; - head = node; - } - - m_freeList = head; - } - } - - //------------------------------------------------------------------------ - // LocationInfoListNodePool::GetNode: Fetches an unused node from the - // pool. - // - // Arguments: - // l - - The `LsraLocation` for the `LocationInfo` value. - // i - The interval for the `LocationInfo` value. - // t - The IR node for the `LocationInfo` value - // regIdx - The register index for the `LocationInfo` value. - // - // Returns: - // A pooled or newly-allocated `LocationInfoListNode`, depending on the - // contents of the pool. - LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) - { - LocationInfoListNode* head = m_freeList; - if (head == nullptr) - { - head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode))); - } - else - { - m_freeList = head->m_next; - } - - head->loc = l; - head->interval = i; - head->treeNode = t; - head->m_next = nullptr; - - return head; - } - - //------------------------------------------------------------------------ - // LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the - // pool. - // - // Arguments: - // list - The list to return. - // - void ReturnNodes(LocationInfoList& list) - { - assert(list.m_head != nullptr); - assert(list.m_tail != nullptr); - - LocationInfoListNode* head = m_freeList; - list.m_tail->m_next = head; - m_freeList = list.m_head; - - list.m_head = nullptr; - list.m_tail = nullptr; - } -}; - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE -VARSET_VALRET_TP -LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc) -{ - assert(enregisterLocalVars); - VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler)); - regMaskTP fpCalleeKillSet = RBM_NONE; - if (!VarSetOps::IsEmpty(compiler, largeVectorVars)) - { - // We actually need to find any calls that kill the upper-half of the callee-save vector registers. - // But we will use as a proxy any node that kills floating point registers. - // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) - fpCalleeKillSet = getKillSetForNode(tree); - if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE) - { - VarSetOps::AssignNoCopy(compiler, liveLargeVectors, - VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars)); - VarSetOps::Iter iter(compiler, liveLargeVectors); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - Interval* varInterval = getIntervalForLocalVar(varIndex); - Interval* tempInterval = newInterval(varInterval->registerType); - tempInterval->isInternal = true; - RefPosition* pos = - newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED); - // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set - // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding - // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval, - // and set varInterval as the relatedInterval of tempInterval. - tempInterval->relatedInterval = varInterval->relatedInterval; - varInterval->relatedInterval = tempInterval; - } - } - } - return liveLargeVectors; -} - -void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree, - LsraLocation currentLoc, - VARSET_VALARG_TP liveLargeVectors) -{ - assert(enregisterLocalVars); - if (!VarSetOps::IsEmpty(compiler, liveLargeVectors)) - { - VarSetOps::Iter iter(compiler, liveLargeVectors); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - Interval* varInterval = getIntervalForLocalVar(varIndex); - Interval* tempInterval = varInterval->relatedInterval; - assert(tempInterval->isInternal == true); - RefPosition* pos = - newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED); - // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval - // of tempInterval. - varInterval->relatedInterval = tempInterval->relatedInterval; - tempInterval->relatedInterval = varInterval; - } - } -} -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - -#ifdef DEBUG -//------------------------------------------------------------------------ -// ComputeOperandDstCount: computes the number of registers defined by a -// node. -// -// For most nodes, this is simple: -// - Nodes that do not produce values (e.g. stores and other void-typed -// nodes) and nodes that immediately use the registers they define -// produce no registers -// - Nodes that are marked as defining N registers define N registers. -// -// For contained nodes, however, things are more complicated: for purposes -// of bookkeeping, a contained node is treated as producing the transitive -// closure of the registers produced by its sources. -// -// Arguments: -// operand - The operand for which to compute a register count. -// -// Returns: -// The number of registers defined by `operand`. -// -static int ComputeOperandDstCount(GenTree* operand) -{ - // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though - // note that it is not in the linear order. It seems best to check for !IsLIR() rather than - // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method. - if (!operand->IsLIR()) - { - return 0; - } - if (operand->isContained()) - { - int dstCount = 0; - for (GenTree* op : operand->Operands()) - { - dstCount += ComputeOperandDstCount(op); - } - - return dstCount; - } - if (operand->IsUnusedValue()) - { - // Operands that define an unused value do not produce any registers. - return 0; - } - if (operand->IsValue()) - { - // Operands that are values and are not contained consume all of their operands - // and produce one or more registers. - return operand->GetRegisterDstCount(); - } - else - { - // This must be one of the operand types that are neither contained nor produce a value. - // Stores and void-typed operands may be encountered when processing call nodes, which contain - // pointers to argument setup stores. - assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || - operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || - operand->TypeGet() == TYP_VOID); - return 0; - } -} - -//------------------------------------------------------------------------ -// ComputeAvailableSrcCount: computes the number of registers available as -// sources for a node. -// -// This is simply the sum of the number of registers produced by each -// operand to the node. -// -// Arguments: -// node - The node for which to compute a source count. -// -// Retures: -// The number of registers available as sources for `node`. -// -static int ComputeAvailableSrcCount(GenTree* node) -{ - int numSources = 0; - for (GenTree* operand : node->Operands()) - { - numSources += ComputeOperandDstCount(operand); - } - - return numSources; -} -#endif // DEBUG - -void LinearScan::buildRefPositionsForNode(GenTree* tree, - BasicBlock* block, - LocationInfoListNodePool& listNodePool, - LsraLocation currentLoc) -{ -#ifdef _TARGET_ARM_ - assert(!isRegPairType(tree->TypeGet())); -#endif // _TARGET_ARM_ - - // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. - // GT_CLS_VAR nodes should have been eliminated by rationalizer. - assert(tree->OperGet() != GT_ARGPLACE); - assert(tree->OperGet() != GT_LIST); - assert(tree->OperGet() != GT_CLS_VAR); - - // The LIR traversal visits only the first node in a GT_FIELD_LIST. - assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead()); - - // The set of internal temporary registers used by this node are stored in the - // gtRsvdRegs register mask. Clear it out. - tree->gtRsvdRegs = RBM_NONE; - -#ifdef DEBUG - if (VERBOSE) - { - dumpDefList(); - compiler->gtDispTree(tree, nullptr, nullptr, true); - } -#endif // DEBUG - - // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will - // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will - // be thrown away when we're done. - LocationInfoListNode* locationInfo = nullptr; - TreeNodeInfo tempInfo; - TreeNodeInfo* info = nullptr; - if (!tree->isContained() && tree->IsValue()) - { - locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree); - info = &locationInfo->info; - } - else - { - info = &tempInfo; - } - info->Initialize(this, tree); - TreeNodeInfoInit(tree, info); - -#ifdef DEBUG - if (VERBOSE) - { - printf(" +"); - info->dump(this); - tree->dumpLIRFlags(); - printf("\n"); - } -#endif // DEBUG - - assert(info->IsValid(this)); - int consume = info->srcCount; - int produce = info->dstCount; - -#ifdef DEBUG - if (VERBOSE) - { - if (tree->isContained()) - { - JITDUMP("Contained\n"); - } - else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && info->isLocalDefUse) - { - JITDUMP("Unused\n"); - } - else - { - JITDUMP(" consume=%d produce=%d\n", consume, produce); - } - } -#endif // DEBUG - - assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume)); - - if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; - if (isCandidateVar(varDsc)) - { - assert(consume == 0); - - // We handle tracked variables differently from non-tracked ones. If it is tracked, - // we simply add a use or def of the tracked variable. Otherwise, for a use we need - // to actually add the appropriate references for loading or storing the variable. - // - // It won't actually get used or defined until the appropriate ancestor tree node - // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument - // to a call - - assert(varDsc->lvTracked); - unsigned varIndex = varDsc->lvVarIndex; - - // We have only approximate last-use information at this point. This is because the - // execution order doesn't actually reflect the true order in which the localVars - // are referenced - but the order of the RefPositions will, so we recompute it after - // RefPositions are built. - // Use the old value for setting currentLiveVars - note that we do this with the - // not-quite-correct setting of lastUse. However, this is OK because - // 1) this is only for preferencing, which doesn't require strict correctness, and - // 2) the cases where these out-of-order uses occur should not overlap a kill. - // TODO-Throughput: clean this up once we have the execution order correct. At that point - // we can update currentLiveVars at the same place that we create the RefPosition. - if ((tree->gtFlags & GTF_VAR_DEATH) != 0) - { - VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); - } - - if (!info->isLocalDefUse && !tree->isContained()) - { - assert(produce != 0); - - locationInfo->interval = getIntervalForLocalVar(varIndex); - defList.Append(locationInfo); - } - return; - } - } - - // Handle the case of local variable assignment - Interval* varDefInterval = nullptr; - RefType defRefType = RefTypeDef; - - GenTree* defNode = tree; - - // noAdd means the node creates a def but for purposes of map - // management do not add it because data is not flowing up the - // tree - - bool noAdd = info->isLocalDefUse; - RefPosition* prevPos = nullptr; - - bool isSpecialPutArg = false; - - assert(!tree->OperIsAssignment()); - if (tree->OperIsLocalStore()) - { - GenTreeLclVarCommon* const store = tree->AsLclVarCommon(); - assert((consume > 1) || (regType(store->gtOp1->TypeGet()) == regType(store->TypeGet()))); - - LclVarDsc* varDsc = &compiler->lvaTable[store->gtLclNum]; - if (isCandidateVar(varDsc)) - { - // We always push the tracked lclVar intervals - assert(varDsc->lvTracked); - unsigned varIndex = varDsc->lvVarIndex; - varDefInterval = getIntervalForLocalVar(varIndex); - defRefType = refTypeForLocalRefNode(tree); - defNode = tree; - if (produce == 0) - { - produce = 1; - noAdd = true; - } - - assert(consume <= MAX_RET_REG_COUNT); - if (consume == 1) - { - // Get the location info for the register defined by the first operand. - LocationInfoListNode& operandInfo = *(useList.Begin()); - assert(operandInfo.treeNode == tree->gtGetOp1()); - - Interval* srcInterval = operandInfo.interval; - if (srcInterval->relatedInterval == nullptr) - { - // Preference the source to the dest, unless this is a non-last-use localVar. - // Note that the last-use info is not correct, but it is a better approximation than preferencing - // the source to the dest, if the source's lifetime extends beyond the dest. - if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0) - { - srcInterval->assignRelatedInterval(varDefInterval); - } - } - else if (!srcInterval->isLocalVar) - { - // Preference the source to dest, if src is not a local var. - srcInterval->assignRelatedInterval(varDefInterval); - } - } - - if ((tree->gtFlags & GTF_VAR_DEATH) == 0) - { - VarSetOps::AddElemD(compiler, currentLiveVars, varIndex); - } - } - else if (store->gtOp1->OperIs(GT_BITCAST)) - { - store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet(); - - // Get the location info for the register defined by the first operand. - LocationInfoListNode& operandInfo = *(useList.Begin()); - assert(operandInfo.treeNode == tree->gtGetOp1()); - - Interval* srcInterval = operandInfo.interval; - srcInterval->registerType = regType(store->TypeGet()); - - RefPosition* srcDefPosition = srcInterval->firstRefPosition; - assert(srcDefPosition != nullptr); - assert(srcDefPosition->refType == RefTypeDef); - assert(srcDefPosition->treeNode == store->gtOp1); - - srcDefPosition->registerAssignment = allRegs(store->TypeGet()); - operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet())); - } - } - else if (noAdd && produce == 0) - { - // Dead nodes may remain after tree rationalization, decomposition or lowering. - // They should be marked as UnusedValue. - // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation. - assert(!noAdd || (produce != 0)); - } - - Interval* prefSrcInterval = nullptr; - - // If this is a binary operator that will be encoded with 2 operand fields - // (i.e. the target is read-modify-write), preference the dst to op1. - - bool hasDelayFreeSrc = info->hasDelayFreeSrc; - -#if defined(DEBUG) && defined(_TARGET_X86_) - // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode - // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the - // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in - // situations like the following: - // - // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d> - // - // /--* t1026 ref - // t1352 = * putarg_reg ref REG NA - // - // t342 = lclVar int V14 loc6 u:4 REG NA $50c - // - // t343 = const int 1 REG NA $41 - // - // /--* t342 int - // +--* t343 int - // t344 = * + int REG NA $495 - // - // t345 = lclVar int V04 arg4 u:2 REG NA $100 - // - // /--* t344 int - // +--* t345 int - // t346 = * % int REG NA $496 - // - // /--* t346 int - // t1353 = * putarg_reg int REG NA - // - // t1354 = lclVar ref V52 tmp35 (last use) REG NA - // - // /--* t1354 ref - // t1355 = * lea(b+0) byref REG NA - // - // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the - // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register - // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to - // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be - // available due to the special put arg. - const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER; -#else - const bool supportsSpecialPutArg = true; -#endif - - if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && - (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0) - { - // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, - // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register - // (which is actually what we hope will happen). - JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n"); - - // Get the register information for the first operand of the node. - LocationInfoListNode* operandDef = useList.Begin(); - assert(operandDef->treeNode == tree->gtGetOp1()); - - // Preference the destination to the interval of the first register defined by the first operand. - Interval* srcInterval = operandDef->interval; - assert(srcInterval->isLocalVar); - prefSrcInterval = srcInterval; - isSpecialPutArg = true; - } - - RefPosition* internalRefs[MaxInternalRegisters]; - -#ifdef DEBUG - // If we are constraining the registers for allocation, we will modify all the RefPositions - // we've built for this node after we've created them. In order to do that, we'll remember - // the last RefPosition prior to those created for this node. - RefPositionIterator refPositionMark = refPositions.backPosition(); -#endif // DEBUG - - // Make intervals for all the 'internal' register requirements for this node, - // where internal means additional registers required temporarily. - // Create a RefTypeDef RefPosition for each such interval. - int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs); - - // Make use RefPositions for all used values. - int consumed = 0; - for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end; - listNode = listNode->Next()) - { - LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode); - - // For tree temps, a use is always a last use and the end of the range; - // this is set by default in newRefPosition - GenTree* const useNode = locInfo.treeNode; - assert(useNode != nullptr); - - Interval* srcInterval = locInfo.interval; - TreeNodeInfo& useNodeInfo = locInfo.info; - if (useNodeInfo.isTgtPref) - { - prefSrcInterval = srcInterval; - } - - const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree); - - regMaskTP candidates = useNodeInfo.getSrcCandidates(this); -#ifdef _TARGET_ARM_ - regMaskTP allCandidates = candidates; - - if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp()) - { - // get i-th candidate, set bits in useCandidates must be in sequential order. - candidates = genFindLowestReg(allCandidates); - allCandidates &= ~candidates; - } -#endif // _TARGET_ARM_ - - assert((candidates & allRegs(srcInterval->registerType)) != 0); - - // For non-localVar uses we record nothing, as nothing needs to be written back to the tree. - GenTree* const refPosNode = srcInterval->isLocalVar ? useNode : nullptr; - RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, 0); - if (delayRegFree) - { - pos->delayRegFree = true; - } - - if (useNode->IsRegOptional()) - { - pos->setAllocateIfProfitable(true); - } - consumed++; - - // Create additional use RefPositions for multi-reg nodes. - for (int idx = 1; idx < locInfo.info.dstCount; idx++) - { - noway_assert(srcInterval->relatedInterval != nullptr); - srcInterval = srcInterval->relatedInterval; -#ifdef _TARGET_ARM_ - if (useNode->OperIsPutArgSplit() || - (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST))) - { - // get first candidate, set bits in useCandidates must be in sequential order. - candidates = genFindLowestReg(allCandidates); - allCandidates &= ~candidates; - } -#endif // _TARGET_ARM_ - RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, idx); - consumed++; - } - } - - assert(consumed == consume); - if (consume != 0) - { - listNodePool.ReturnNodes(useList); - } - - buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount); - - RegisterType registerType = getDefType(tree); - regMaskTP candidates = info->getDstCandidates(this); - regMaskTP useCandidates = info->getSrcCandidates(this); - -#ifdef DEBUG - if (VERBOSE && produce) - { - printf("Def candidates "); - dumpRegMask(candidates); - printf(", Use candidates "); - dumpRegMask(useCandidates); - printf("\n"); - } -#endif // DEBUG - -#if defined(_TARGET_AMD64_) - // Multi-reg call node is the only node that could produce multi-reg value - assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT)); -#endif // _TARGET_xxx_ - - // Add kill positions before adding def positions - buildKillPositionsForNode(tree, currentLoc + 1); - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - VARSET_TP liveLargeVectors(VarSetOps::UninitVal()); - if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE)) - { - // Build RefPositions for saving any live large vectors. - // This must be done after the kills, so that we know which large vectors are still live. - VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1)); - } -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - - ReturnTypeDesc* retTypeDesc = nullptr; - bool isMultiRegCall = tree->IsMultiRegCall(); - if (isMultiRegCall) - { - retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); - assert((int)genCountBits(candidates) == produce); - assert(candidates == retTypeDesc->GetABIReturnRegs()); - } - - // push defs - LocationInfoList locationInfoList; - LsraLocation defLocation = currentLoc + 1; - Interval* interval = varDefInterval; - // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field. - // Keep track of the previous interval allocated, for that purpose. - Interval* prevInterval = nullptr; - for (int i = 0; i < produce; i++) - { - regMaskTP currCandidates = candidates; - - // In case of multi-reg call node, registerType is given by - // the type of ith position return register. - if (isMultiRegCall) - { - registerType = retTypeDesc->GetReturnRegType((unsigned)i); - currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i)); - useCandidates = allRegs(registerType); - } - -#ifdef _TARGET_ARM_ - // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order. - if (tree->OperIsPutArgSplit() || tree->OperIsMultiRegOp()) - { - // get i-th candidate - currCandidates = genFindLowestReg(candidates); - candidates &= ~currCandidates; - } -#endif // _TARGET_ARM_ - - if (interval == nullptr) - { - // Make a new interval - interval = newInterval(registerType); - if (hasDelayFreeSrc || info->isInternalRegDelayFree) - { - interval->hasInterferingUses = true; - } - else if (tree->OperIsConst()) - { - assert(!tree->IsReuseRegVal()); - interval->isConstant = true; - } - - if ((currCandidates & useCandidates) != RBM_NONE) - { - interval->updateRegisterPreferences(currCandidates & useCandidates); - } - - if (isSpecialPutArg) - { - interval->isSpecialPutArg = true; - } - } - else - { - assert(registerTypesEquivalent(interval->registerType, registerType)); - } - - if (prefSrcInterval != nullptr) - { - interval->assignRelatedIntervalIfUnassigned(prefSrcInterval); - } - - // for assignments, we want to create a refposition for the def - // but not push it - if (!noAdd) - { - if (i == 0) - { - locationInfo->interval = interval; - prevInterval = interval; - defList.Append(locationInfo); - } - else - { - // This is the 2nd or subsequent register defined by a multi-reg node. - // Connect them using 'relatedInterval'. - noway_assert(prevInterval != nullptr); - prevInterval->relatedInterval = interval; - prevInterval = interval; - prevInterval->isMultiReg = true; - interval->isMultiReg = true; - } - } - - RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i); - if (info->isLocalDefUse) - { - // This must be an unused value, OR it is a special node for which we allocate - // a target register even though it produces no value. - assert(defNode->IsUnusedValue() || (defNode->gtOper == GT_LOCKADD)); - pos->isLocalDefUse = true; - pos->lastUse = true; - } - interval->updateRegisterPreferences(currCandidates); - interval->updateRegisterPreferences(useCandidates); - interval = nullptr; - } - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - // SaveDef position must be at the same location as Def position of call node. - if (enregisterLocalVars) - { - buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors); - } -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - -#ifdef DEBUG - // If we are constraining registers, modify all the RefPositions we've just built to specify the - // minimum reg count required. - if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT)) - { - // The number of registers required for a tree node is the sum of - // consume + produce + internalCount. This is the minimum - // set of registers that needs to be ensured in candidate - // set of ref positions created. - unsigned minRegCount = consume + produce + info->internalIntCount + info->internalFloatCount; - - for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) - { - RefPosition* newRefPosition = &(*refPositionMark); - unsigned minRegCountForRef = minRegCount; - if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree) - { - // If delayRegFree, then Use will interfere with the destination of the consuming node. - // Therefore, we also need add the kill set of the consuming node to minRegCount. - // - // For example consider the following IR on x86, where v01 and v02 - // are method args coming in ecx and edx respectively. - // GT_DIV(v01, v02) - // - // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node. - // - // Assume further JitStressRegs=2, which would constrain candidates to callee trashable - // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01. - // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and - // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for - // the use position of v02 also needs to take into account the kill set of its consuming node. - regMaskTP killMask = getKillSetForNode(tree); - if (killMask != RBM_NONE) - { - minRegCountForRef += genCountBits(killMask); - } - } - newRefPosition->minRegCandidateCount = minRegCountForRef; - if (newRefPosition->IsActualRef() && doReverseCallerCallee()) - { - Interval* interval = newRefPosition->getInterval(); - regMaskTP oldAssignment = newRefPosition->registerAssignment; - regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); - newRefPosition->registerAssignment = - getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef); - if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && - !interval->isLocalVar) - { - checkConflictingDefUse(newRefPosition); - } - } - } - } -#endif // DEBUG - JITDUMP("\n"); -} - -// make an interval for each physical register -void LinearScan::buildPhysRegRecords() -{ - RegisterType regType = IntRegisterType; - for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) - { - RegRecord* curr = &physRegs[reg]; - curr->init(reg); - } -} - -BasicBlock* getNonEmptyBlock(BasicBlock* block) -{ - while (block != nullptr && block->bbTreeList == nullptr) - { - BasicBlock* nextBlock = block->bbNext; - // Note that here we use the version of NumSucc that does not take a compiler. - // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan. - // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which - // case we don't need the version that takes a compiler. - assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE))); - // sometimes the first block is empty and ends with an uncond branch - // assert( block->GetSucc(0) == nextBlock); - block = nextBlock; - } - assert(block != nullptr && block->bbTreeList != nullptr); - return block; -} - -//------------------------------------------------------------------------ -// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block -// -// Notes: -// Prior to calling this method, 'currentLiveVars' must be set to the set of register -// candidate variables that are liveIn to the first block. -// For each register candidate that is live-in to the first block: -// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created. -// - Otherwise, it will be marked as spilled, since it will not be assigned a register -// on entry and will be loaded from memory on the undefined path. -// Note that, when the compInitMem option is not set, we may encounter these on -// paths that are protected by the same condition as an earlier def. However, since -// we don't do the analysis to determine this - and couldn't rely on always identifying -// such cases even if we tried - we must conservatively treat the undefined path as -// being possible. This is a relatively rare case, so the introduced conservatism is -// not expected to warrant the analysis required to determine the best placement of -// an initialization. -// -void LinearScan::insertZeroInitRefPositions() -{ - assert(enregisterLocalVars); -#ifdef DEBUG - VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn)); - assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars)); -#endif // DEBUG - - // insert defs for this, then a block boundary - - VarSetOps::Iter iter(compiler, currentLiveVars); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - if (!varDsc->lvIsParam && isCandidateVar(varDsc)) - { - JITDUMP("V%02u was live in to first block:", varNum); - Interval* interval = getIntervalForLocalVar(varIndex); - if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) - { - JITDUMP(" creating ZeroInit\n"); - GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); - RefPosition* pos = - newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); - varDsc->lvMustInit = true; - } - else - { - setIntervalAsSpilled(interval); - JITDUMP(" marking as spilled\n"); - } - } - } -} - -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -// ----------------------------------------------------------------------- -// Sets the register state for an argument of type STRUCT for System V systems. -// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp -// for how state for argument is updated for unix non-structs and Windows AMD64 structs. -void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) -{ - assert(varTypeIsStruct(argDsc)); - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - - if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) - { - if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) - { - assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); - floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); - } - else - { - assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); - intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); - } - } - - if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) - { - if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) - { - assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); - floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); - } - else - { - assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); - intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); - } - } -} - -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - -//------------------------------------------------------------------------ -// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate -// regState (either compiler->intRegState or compiler->floatRegState), -// with the lvArgReg on "argDsc" -// -// Arguments: -// argDsc - the argument for which the state is to be updated. -// -// Return Value: None -// -// Assumptions: -// The argument is live on entry to the function -// (or is untracked and therefore assumed live) -// -// Notes: -// This relies on a method in regAlloc.cpp that is shared between LSRA -// and regAlloc. It is further abstracted here because regState is updated -// separately for tracked and untracked variables in LSRA. -// -void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) -{ -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - // For System V AMD64 calls the argDsc can have 2 registers (for structs.) - // Handle them here. - if (varTypeIsStruct(argDsc)) - { - unixAmd64UpdateRegStateForArg(argDsc); - } - else -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - { - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - // In the case of AMD64 we'll still use the floating point registers - // to model the register usage for argument on vararg calls, so - // we will ignore the varargs condition to determine whether we use - // XMM registers or not for setting up the call. - bool isFloat = (isFloatRegType(argDsc->lvType) -#ifndef _TARGET_AMD64_ - && !compiler->info.compIsVarArgs -#endif - && !compiler->opts.compUseSoftFP); - - if (argDsc->lvIsHfaRegArg()) - { - isFloat = true; - } - - if (isFloat) - { - JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); - compiler->raUpdateRegStateForArg(floatRegState, argDsc); - } - else - { - JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); -#if FEATURE_MULTIREG_ARGS - if (argDsc->lvOtherArgReg != REG_NA) - { - JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg)); - } -#endif // FEATURE_MULTIREG_ARGS - compiler->raUpdateRegStateForArg(intRegState, argDsc); - } - } -} - //------------------------------------------------------------------------ // findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables. // @@ -4371,448 +2380,6 @@ BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block, return predBlock; } -void LinearScan::buildIntervals() -{ - BasicBlock* block; - - JITDUMP("\nbuildIntervals ========\n"); - - // Now build (empty) records for all of the physical registers - buildPhysRegRecords(); - -#ifdef DEBUG - if (VERBOSE) - { - printf("\n-----------------\n"); - printf("LIVENESS:\n"); - printf("-----------------\n"); - foreach_block(compiler, block) - { - printf("BB%02u use def in out\n", block->bbNum); - dumpConvertedVarSet(compiler, block->bbVarUse); - printf("\n"); - dumpConvertedVarSet(compiler, block->bbVarDef); - printf("\n"); - dumpConvertedVarSet(compiler, block->bbLiveIn); - printf("\n"); - dumpConvertedVarSet(compiler, block->bbLiveOut); - printf("\n"); - } - } -#endif // DEBUG - -#if DOUBLE_ALIGN - // We will determine whether we should double align the frame during - // identifyCandidates(), but we initially assume that we will not. - doDoubleAlign = false; -#endif - - identifyCandidates(); - - // Figure out if we're going to use a frame pointer. We need to do this before building - // the ref positions, because those objects will embed the frame register in various register masks - // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will - // remove the frame pointer from the masks. - setFrameType(); - - DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); - - // second part: - JITDUMP("\nbuildIntervals second part ========\n"); - currentLoc = 0; - // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were - // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering. - // Previously, the block sequencing was done for the (formerly separate) TreeNodeInfoInit pass, - // and the curBBNum was left as the last block sequenced. This block was then used to set the - // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the - // normalized entry weight (compiler->fgCalledCount), but that results in a net regression. - if (!blockSequencingDone) - { - setBlockSequence(); - } - curBBNum = blockSequence[bbSeqCount - 1]->bbNum; - - // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex. - // Assign these RefPositions to the (nonexistent) BB0. - curBBNum = 0; - - LclVarDsc* argDsc; - unsigned int lclNum; - - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; - floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; - - for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) - { - lclNum = compiler->lvaTrackedToVarNum[varIndex]; - argDsc = &(compiler->lvaTable[lclNum]); - - if (!argDsc->lvIsParam) - { - continue; - } - - // Only reserve a register if the argument is actually used. - // Is it dead on entry? If compJmpOpUsed is true, then the arguments - // have to be kept alive, so we have to consider it as live on entry. - // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we - // won't have done dataflow on it, but it needs to be marked as live-in so - // it will get saved in the prolog. - if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode) - { - continue; - } - - if (argDsc->lvIsRegArg) - { - updateRegStateForArg(argDsc); - } - - if (isCandidateVar(argDsc)) - { - Interval* interval = getIntervalForLocalVar(varIndex); - regMaskTP mask = allRegs(TypeGet(argDsc)); - if (argDsc->lvIsRegArg) - { - // Set this interval as currently assigned to that register - regNumber inArgReg = argDsc->lvArgReg; - assert(inArgReg < REG_COUNT); - mask = genRegMask(inArgReg); - assignPhysReg(inArgReg, interval); - } - RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); - } - else if (varTypeIsStruct(argDsc->lvType)) - { - for (unsigned fieldVarNum = argDsc->lvFieldLclStart; - fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) - { - LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]); - if (fieldVarDsc->lvLRACandidate) - { - assert(fieldVarDsc->lvTracked); - Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex); - RefPosition* pos = - newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc))); - } - } - } - else - { - // We can overwrite the register (i.e. codegen saves it on entry) - assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister || - !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode)); - } - } - - // Now set up the reg state for the non-tracked args - // (We do this here because we want to generate the ParamDef RefPositions in tracked - // order, so that loop doesn't hit the non-tracked args) - - for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++) - { - argDsc = &(compiler->lvaTable[argNum]); - - if (argDsc->lvPromotedStruct()) - { - noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = argDsc->lvFieldLclStart; - argDsc = &(compiler->lvaTable[fieldVarNum]); - } - noway_assert(argDsc->lvIsParam); - if (!argDsc->lvTracked && argDsc->lvIsRegArg) - { - updateRegStateForArg(argDsc); - } - } - - // If there is a secret stub param, it is also live in - if (compiler->info.compPublishStubParam) - { - intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; - } - - LocationInfoListNodePool listNodePool(compiler, 8); - - BasicBlock* predBlock = nullptr; - BasicBlock* prevBlock = nullptr; - - // Initialize currentLiveVars to the empty set. We will set it to the current - // live-in at the entry to each block (this will include the incoming args on - // the first block). - VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler)); - - for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) - { - JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum); - - bool predBlockIsAllocated = false; - predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); - if (predBlock) - { - JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n", - block->bbNum, predBlock->bbNum); - assert(predBlock->bbNum <= bbNumMaxBeforeResolution); - blockInfo[block->bbNum].predBBNum = predBlock->bbNum; - } - - if (enregisterLocalVars) - { - VarSetOps::AssignNoCopy(compiler, currentLiveVars, - VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn)); - - if (block == compiler->fgFirstBB) - { - insertZeroInitRefPositions(); - // The first real location is at 1; 0 is for the entry. - currentLoc = 1; - } - - // Any lclVars live-in to a block are resolution candidates. - VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); - - // Determine if we need any DummyDefs. - // We need DummyDefs for cases where "predBlock" isn't really a predecessor. - // Note that it's possible to have uses of unitialized variables, in which case even the first - // block may require DummyDefs, which we are not currently adding - this means that these variables - // will always be considered to be in memory on entry (and reloaded when the use is encountered). - // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized - // variables (which may actually be initialized along the dynamically executed paths, but not - // on all static paths), we wind up with excessive liveranges for some of these variables. - VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); - if (predBlock) - { - // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut - VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); - } - bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); - - // Create dummy def RefPositions - - if (needsDummyDefs) - { - // If we are using locations from a predecessor, we should never require DummyDefs. - assert(!predBlockIsAllocated); - - JITDUMP("Creating dummy definitions\n"); - VarSetOps::Iter iter(compiler, newLiveIn); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. - // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs). - if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam)) - { - Interval* interval = getIntervalForLocalVar(varIndex); - RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, - allRegs(interval->registerType)); - } - } - JITDUMP("Finished creating dummy definitions\n\n"); - } - } - - // Add a dummy RefPosition to mark the block boundary. - // Note that we do this AFTER adding the exposed uses above, because the - // register positions for those exposed uses need to be recorded at - // this point. - - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); - currentLoc += 2; - JITDUMP("\n"); - - LIR::Range& blockRange = LIR::AsRange(block); - for (GenTree* node : blockRange.NonPhiNodes()) - { - // We increment the number position of each tree node by 2 to simplify the logic when there's the case of - // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to - // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only - // increment the number position by one. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - node->gtSeqNum = currentLoc; - // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register - // value. - // Although this looks like a no-op it sets the tag. - node->gtRegNum = node->gtRegNum; -#endif - - buildRefPositionsForNode(node, block, listNodePool, currentLoc); - -#ifdef DEBUG - if (currentLoc > maxNodeLocation) - { - maxNodeLocation = currentLoc; - } -#endif // DEBUG - currentLoc += 2; - } - - // Note: the visited set is cleared in LinearScan::doLinearScan() - markBlockVisited(block); - assert(defList.IsEmpty()); - - if (enregisterLocalVars) - { - // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the - // next block, or any unvisited successors. - // This will address lclVars that are live on a backedge, as well as those that are kept - // live at a GT_JMP. - // - // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, - // and jmp call is represented using GT_JMP node which is a leaf node. - // Liveness phase keeps all the arguments of the method live till the end of - // block by adding them to liveout set of the block containing GT_JMP. - // - // The target of a GT_JMP implicitly uses all the current method arguments, however - // there are no actual references to them. This can cause LSRA to assert, because - // the variables are live but it sees no references. In order to correctly model the - // liveness of these arguments, we add dummy exposed uses, in the same manner as for - // backward branches. This will happen automatically via expUseSet. - // - // Note that a block ending with GT_JMP has no successors and hence the variables - // for which dummy use ref positions are added are arguments of the method. - - VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); - VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); - BasicBlock* nextBlock = getNextBlock(); - if (nextBlock != nullptr) - { - VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); - } - for (BasicBlock* succ : block->GetAllSuccs(compiler)) - { - if (VarSetOps::IsEmpty(compiler, expUseSet)) - { - break; - } - - if (isBlockVisited(succ)) - { - continue; - } - VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); - } - - if (!VarSetOps::IsEmpty(compiler, expUseSet)) - { - JITDUMP("Exposed uses:"); - VarSetOps::Iter iter(compiler, expUseSet); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - assert(isCandidateVar(varDsc)); - Interval* interval = getIntervalForLocalVar(varIndex); - RefPosition* pos = - newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); - JITDUMP(" V%02u", varNum); - } - JITDUMP("\n"); - } - - // Clear the "last use" flag on any vars that are live-out from this block. - { - VarSetOps::Iter iter(compiler, block->bbLiveOut); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* const varDsc = &compiler->lvaTable[varNum]; - if (isCandidateVar(varDsc)) - { - RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; - if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) - { - lastRP->lastUse = false; - } - } - } - } - -#ifdef DEBUG - checkLastUses(block); - - if (VERBOSE) - { - printf("use: "); - dumpConvertedVarSet(compiler, block->bbVarUse); - printf("\ndef: "); - dumpConvertedVarSet(compiler, block->bbVarDef); - printf("\n"); - } -#endif // DEBUG - } - - prevBlock = block; - } - - if (enregisterLocalVars) - { - if (compiler->lvaKeepAliveAndReportThis()) - { - // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end - unsigned keepAliveVarNum = compiler->info.compThisArg; - assert(compiler->info.compIsStatic == false); - LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum; - if (isCandidateVar(varDsc)) - { - JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n"); - Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); - RefPosition* pos = - newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); - } - } - -#ifdef DEBUG - if (getLsraExtendLifeTimes()) - { - LclVarDsc* varDsc; - for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) - { - if (varDsc->lvLRACandidate) - { - JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum); - Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); - RefPosition* pos = - newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); - } - } - } -#endif // DEBUG - } - - // If the last block has successors, create a RefTypeBB to record - // what's live - - if (prevBlock->NumSucc(compiler) > 0) - { - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); - } - -#ifdef DEBUG - // Make sure we don't have any blocks that were not visited - foreach_block(compiler, block) - { - assert(isBlockVisited(block)); - } - - if (VERBOSE) - { - lsraDumpIntervals("BEFORE VALIDATING INTERVALS"); - dumpRefPositions("BEFORE VALIDATING INTERVALS"); - validateIntervals(); - } -#endif // DEBUG -} - #ifdef DEBUG void LinearScan::dumpVarRefPositions(const char* title) { @@ -4838,46 +2405,6 @@ void LinearScan::dumpVarRefPositions(const char* title) } } -void LinearScan::validateIntervals() -{ - if (enregisterLocalVars) - { - for (unsigned i = 0; i < compiler->lvaTrackedCount; i++) - { - if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate) - { - continue; - } - Interval* interval = getIntervalForLocalVar(i); - - bool defined = false; - printf("-----------------\n"); - for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) - { - ref->dump(); - RefType refType = ref->refType; - if (!defined && RefTypeIsUse(refType)) - { - if (compiler->info.compMethodName != nullptr) - { - printf("%s: ", compiler->info.compMethodName); - } - printf("LocalVar V%02u: undefined use at %u\n", interval->varNum, ref->nodeLocation); - } - // Note that there can be multiple last uses if they are on disjoint paths, - // so we can't really check the lastUse flag - if (ref->lastUse) - { - defined = false; - } - if (RefTypeIsDef(refType)) - { - defined = true; - } - } - } - } -} #endif // DEBUG // Set the default rpFrameType based upon codeGen->isFramePointerRequired() @@ -10847,206 +8374,6 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, } } -//------------------------------------------------------------------------ -// GetIndirInfo: Get the source registers for an indirection that might be contained. -// -// Arguments: -// node - The node of interest -// -// Return Value: -// The number of source registers used by the *parent* of this node. -// -// Notes: -// Adds the defining node for each register to the useList. -// -int LinearScan::GetIndirInfo(GenTreeIndir* indirTree) -{ - GenTree* const addr = indirTree->gtOp1; - if (!addr->isContained()) - { - appendLocationInfoToList(addr); - return 1; - } - if (!addr->OperIs(GT_LEA)) - { - return 0; - } - - GenTreeAddrMode* const addrMode = addr->AsAddrMode(); - - unsigned srcCount = 0; - if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) - { - appendLocationInfoToList(addrMode->Base()); - srcCount++; - } - if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) - { - appendLocationInfoToList(addrMode->Index()); - srcCount++; - } - return srcCount; -} - -//------------------------------------------------------------------------ -// GetOperandInfo: Get the source registers for an operand that might be contained. -// -// Arguments: -// node - The node of interest -// useList - The list of uses for the node that we're currently processing -// -// Return Value: -// The number of source registers used by the *parent* of this node. -// -// Notes: -// Adds the defining node for each register to the given useList. -// -int LinearScan::GetOperandInfo(GenTree* node) -{ - if (!node->isContained()) - { - appendLocationInfoToList(node); - return 1; - } - -#if !defined(_TARGET_64BIT_) - if (node->OperIs(GT_LONG)) - { - return appendBinaryLocationInfoToList(node->AsOp()); - } -#endif // !defined(_TARGET_64BIT_) - if (node->OperIsIndir()) - { - const unsigned srcCount = GetIndirInfo(node->AsIndir()); - return srcCount; - } - - return 0; -} - -//------------------------------------------------------------------------ -// GetOperandInfo: Get the source registers for an operand that might be contained. -// -// Arguments: -// node - The node of interest -// useList - The list of uses for the node that we're currently processing -// -// Return Value: -// The number of source registers used by the *parent* of this node. -// -// Notes: -// Adds the defining node for each register to the useList. -// -int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo) -{ - LocationInfoListNode* prevLast = useList.Last(); - int srcCount = GetOperandInfo(node); - if (prevLast == nullptr) - { - *pFirstInfo = useList.Begin(); - } - else - { - *pFirstInfo = prevLast->Next(); - } - return srcCount; -} - -void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node) -{ - _dstCount = 0; - _srcCount = 0; - _internalIntCount = 0; - _internalFloatCount = 0; - - isLocalDefUse = false; - isDelayFree = false; - hasDelayFreeSrc = false; - isTgtPref = false; - isInternalRegDelayFree = false; - - regMaskTP dstCandidates; - - // if there is a reg indicated on the tree node, use that for dstCandidates - // the exception is the NOP, which sometimes show up around late args. - // TODO-Cleanup: get rid of those NOPs. - if (node->gtRegNum == REG_STK) - { - dstCandidates = RBM_NONE; - } - else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP) - { -#ifdef ARM_SOFTFP - if (node->OperGet() == GT_PUTARG_REG) - { - dstCandidates = lsra->allRegs(TYP_INT); - } - else -#endif - { - dstCandidates = lsra->allRegs(node->TypeGet()); - } - } - else - { - dstCandidates = genRegMask(node->gtRegNum); - } - - setDstCandidates(lsra, dstCandidates); - srcCandsIndex = dstCandsIndex; - - setInternalCandidates(lsra, lsra->allRegs(TYP_INT)); - -#ifdef DEBUG - isInitialized = true; -#endif - - assert(IsValid(lsra)); -} - -regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra) -{ - return lsra->GetRegMaskForIndex(srcCandsIndex); -} - -void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask) -{ - LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); - assert(FitsIn<unsigned char>(i)); - srcCandsIndex = (unsigned char)i; -} - -regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra) -{ - return lsra->GetRegMaskForIndex(dstCandsIndex); -} - -void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask) -{ - LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); - assert(FitsIn<unsigned char>(i)); - dstCandsIndex = (unsigned char)i; -} - -regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra) -{ - return lsra->GetRegMaskForIndex(internalCandsIndex); -} - -void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask) -{ - LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask); - assert(FitsIn<unsigned char>(i)); - internalCandsIndex = (unsigned char)i; -} - -void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask) -{ - LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask); - assert(FitsIn<unsigned char>(i)); - internalCandsIndex = (unsigned char)i; -} - #if TRACK_LSRA_STATS // ---------------------------------------------------------- // updateLsraStat: Increment LSRA stat counter. @@ -11616,18 +8943,7 @@ void LinearScan::lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDes } //------------------------------------------------------------------------ -// ComputeOperandDstCount: computes the number of registers defined by a -// node. -// -// For most nodes, this is simple: -// - Nodes that do not produce values (e.g. stores and other void-typed -// nodes) and nodes that immediately use the registers they define -// produce no registers -// - Nodes that are marked as defining N registers define N registers. -// -// For contained nodes, however, things are more complicated: for purposes -// of bookkeeping, a contained node is treated as producing the transitive -// closure of the registers produced by its sources. +// DumpOperandDefs: dumps the registers defined by a node. // // Arguments: // operand - The operand for which to compute a register count. |