diff options
Diffstat (limited to 'src/jit/lsra.cpp')
-rw-r--r-- | src/jit/lsra.cpp | 1051 |
1 files changed, 807 insertions, 244 deletions
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 317b976e42..accfd6ee78 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -355,6 +355,33 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) } #ifdef DEBUG + +//---------------------------------------------------------------------------- +// getConstrainedRegMask: Returns new regMask which is the intersection of +// regMaskActual and regMaskConstraint if the new regMask has at least +// minRegCount registers, otherwise returns regMaskActual. +// +// Arguments: +// regMaskActual - regMask that needs to be constrained +// regMaskConstraint - regMask constraint that needs to be +// applied to regMaskActual +// minRegCount - Minimum number of regs that should be +// be present in new regMask. +// +// Return Value: +// New regMask that has minRegCount registers after instersection. +// Otherwise returns regMaskActual. +regMaskTP LinearScan::getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstraint, unsigned minRegCount) +{ + regMaskTP newMask = regMaskActual & regMaskConstraint; + if (genCountBits(newMask) >= minRegCount) + { + return newMask; + } + + return regMaskActual; +} + //------------------------------------------------------------------------ // stressLimitRegs: Given a set of registers, expressed as a register mask, reduce // them based on the current stress options. @@ -373,38 +400,46 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) { if (getStressLimitRegs() != LSRA_LIMIT_NONE) { + // The refPosition could be null, for example when called + // by getTempRegForResolution(). + int minRegCount = (refPosition != nullptr) ? refPosition->minRegCandidateCount : 1; + switch (getStressLimitRegs()) { case LSRA_LIMIT_CALLEE: - if (!compiler->opts.compDbgEnC && (mask & RBM_CALLEE_SAVED) != RBM_NONE) + if (!compiler->opts.compDbgEnC) { - mask &= RBM_CALLEE_SAVED; + mask = getConstrainedRegMask(mask, RBM_CALLEE_SAVED, minRegCount); } break; + case LSRA_LIMIT_CALLER: - if ((mask & RBM_CALLEE_TRASH) != RBM_NONE) - { - mask &= RBM_CALLEE_TRASH; - } - break; + { + mask = getConstrainedRegMask(mask, RBM_CALLEE_TRASH, minRegCount); + } + break; + case LSRA_LIMIT_SMALL_SET: if ((mask & LsraLimitSmallIntSet) != RBM_NONE) { - mask &= LsraLimitSmallIntSet; + mask = getConstrainedRegMask(mask, LsraLimitSmallIntSet, minRegCount); } else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) { - mask &= LsraLimitSmallFPSet; + mask = getConstrainedRegMask(mask, LsraLimitSmallFPSet, minRegCount); } break; + default: unreached(); } + if (refPosition != nullptr && refPosition->isFixedRegRef) { mask |= refPosition->registerAssignment; } } + return mask; } #endif // DEBUG @@ -658,16 +693,13 @@ void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) #endif // _TARGET_AMD64_ Interval* theInterval = rp->getInterval(); + #ifdef DEBUG regMaskTP calleeSaveMask = calleeSaveRegs(getRegisterType(theInterval, rp)); if (doReverseCallerCallee()) { - regMaskTP newAssignment = rp->registerAssignment; - newAssignment &= calleeSaveMask; - if (newAssignment != RBM_NONE) - { - rp->registerAssignment = newAssignment; - } + rp->registerAssignment = + getConstrainedRegMask(rp->registerAssignment, calleeSaveMask, rp->minRegCandidateCount); } else #endif // DEBUG @@ -777,6 +809,9 @@ RefPosition* LinearScan::newRefPosition( // mask - Set of valid registers for this RefPosition // multiRegIdx - register position if this RefPosition corresponds to a // multi-reg call node. +// minRegCount - Minimum number registers that needs to be ensured while +// constraining candidates for this ref position under +// LSRA stress. This is a DEBUG only arg. // // Return Value: // a new RefPosition @@ -786,7 +821,8 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, RefType theRefType, GenTree* theTreeNode, regMaskTP mask, - unsigned multiRegIdx /* = 0 */) + unsigned multiRegIdx /* = 0 */ + DEBUGARG(unsigned minRegCandidateCount /* = 1 */)) { #ifdef DEBUG if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) @@ -843,6 +879,10 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, newRP->setMultiRegIdx(multiRegIdx); newRP->setAllocateIfProfitable(0); +#ifdef DEBUG + newRP->minRegCandidateCount = minRegCandidateCount; +#endif // DEBUG + associateRefPosWithInterval(newRP); DBEXEC(VERBOSE, newRP->dump()); @@ -1071,12 +1111,14 @@ LinearScan::LinearScan(Compiler* theCompiler) #endif dumpTerse = (JitConfig.JitDumpTerseLsra() != 0); - #endif // DEBUG + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); + #if ETW_EBP_FRAMED availableIntRegs &= ~RBM_FPBASE; #endif // ETW_EBP_FRAMED + availableFloatRegs = RBM_ALLFLOAT; availableDoubleRegs = RBM_ALLDOUBLE; @@ -1272,6 +1314,7 @@ void LinearScan::setBlockSequence() bool addedInternalBlocks = false; verifiedAllBBs = false; + hasCriticalEdges = false; BasicBlock* nextBlock; for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock) { @@ -1288,6 +1331,13 @@ void LinearScan::setBlockSequence() blockInfo[block->bbNum].hasCriticalOutEdge = false; blockInfo[block->bbNum].weight = block->bbWeight; +#if TRACK_LSRA_STATS + blockInfo[block->bbNum].spillCount = 0; + blockInfo[block->bbNum].copyRegCount = 0; + blockInfo[block->bbNum].resolutionMovCount = 0; + blockInfo[block->bbNum].splitEdgeCount = 0; +#endif // TRACK_LSRA_STATS + if (block->GetUniquePred(compiler) == nullptr) { for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext) @@ -1296,6 +1346,7 @@ void LinearScan::setBlockSequence() if (predBlock->NumSucc(compiler) > 1) { blockInfo[block->bbNum].hasCriticalInEdge = true; + hasCriticalEdges = true; break; } else if (predBlock->bbJumpKind == BBJ_SWITCH) @@ -1321,6 +1372,7 @@ void LinearScan::setBlockSequence() if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr) { blockInfo[block->bbNum].hasCriticalOutEdge = true; + hasCriticalEdges = true; // We can stop checking now. checkForCriticalOutEdge = false; } @@ -1666,11 +1718,6 @@ void LinearScan::doLinearScan() compiler->codeGen->regSet.rsClearRegsModified(); - // Figure out if we're going to use an RSP frame or an RBP frame. We need to do this - // before building the intervals and ref positions, because those objects will embed - // RBP in various register masks (like preferences) if RBP is allowed to be allocated. - setFrameType(); - initMaxSpill(); buildIntervals(); DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS)); @@ -1685,6 +1732,17 @@ void LinearScan::doLinearScan() resolveRegisters(); compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE); +#if TRACK_LSRA_STATS + if ((JitConfig.DisplayLsraStats() != 0) +#ifdef DEBUG + || VERBOSE +#endif + ) + { + dumpLsraStats(jitstdout); + } +#endif // TRACK_LSRA_STATS + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST)); compiler->compLSRADone = true; @@ -1892,6 +1950,8 @@ void LinearScan::identifyCandidates() // for vectors on Arm64, though the actual value may differ. VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler)); + VarSetOps::AssignNoCopy(compiler, resolutionCandidateVars, VarSetOps::MakeEmpty(compiler)); + VarSetOps::AssignNoCopy(compiler, splitOrSpilledVars, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler)); unsigned int floatVarCount = 0; unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT; @@ -1902,6 +1962,37 @@ void LinearScan::identifyCandidates() unsigned int largeVectorVarCount = 0; unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT; #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE +#if DOUBLE_ALIGN + unsigned refCntStk = 0; + unsigned refCntReg = 0; + unsigned refCntWtdReg = 0; + unsigned refCntStkParam = 0; // sum of ref counts for all stack based parameters + unsigned refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles + doDoubleAlign = false; + bool checkDoubleAlign = true; + if (compiler->codeGen->isFramePointerRequired() || compiler->opts.MinOpts()) + { + checkDoubleAlign = false; + } + else + { + switch (compiler->getCanDoubleAlign()) + { + case MUST_DOUBLE_ALIGN: + doDoubleAlign = true; + checkDoubleAlign = false; + break; + case CAN_DOUBLE_ALIGN: + break; + case CANT_DOUBLE_ALIGN: + doDoubleAlign = false; + checkDoubleAlign = false; + break; + default: + unreached(); + } + } +#endif // DOUBLE_ALIGN for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) { @@ -1911,6 +2002,32 @@ void LinearScan::identifyCandidates() Interval* newInt = newInterval(intervalType); newInt->setLocalNumber(lclNum, this); + +#if DOUBLE_ALIGN + if (checkDoubleAlign) + { + if (varDsc->lvIsParam && !varDsc->lvIsRegArg) + { + refCntStkParam += varDsc->lvRefCnt; + } + else if (!isRegCandidate(varDsc) || varDsc->lvDoNotEnregister) + { + refCntStk += varDsc->lvRefCnt; + if ((varDsc->lvType == TYP_DOUBLE) || + ((varTypeIsStruct(varDsc) && varDsc->lvStructDoubleAlign && + (compiler->lvaGetPromotionType(varDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT)))) + { + refCntWtdStkDbl += varDsc->lvRefCntWtd; + } + } + else + { + refCntReg += varDsc->lvRefCnt; + refCntWtdReg += varDsc->lvRefCntWtd; + } + } +#endif // DOUBLE_ALIGN + if (varDsc->lvIsStructField) { newInt->isStructField = true; @@ -2095,6 +2212,24 @@ void LinearScan::identifyCandidates() } } +#if DOUBLE_ALIGN + if (checkDoubleAlign) + { + // TODO-CQ: Fine-tune this: + // In the legacy reg predictor, this runs after allocation, and then demotes any lclVars + // allocated to the frame pointer, which is probably the wrong order. + // However, because it runs after allocation, it can determine the impact of demoting + // the lclVars allocated to the frame pointer. + // => Here, estimate of the EBP refCnt and weighted refCnt is a wild guess. + // + unsigned refCntEBP = refCntReg / 8; + unsigned refCntWtdEBP = refCntWtdReg / 8; + + doDoubleAlign = + compiler->shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl); + } +#endif // DOUBLE_ALIGN + // The factors we consider to determine which set of fp vars to use as candidates for callee save // registers current include the number of fp vars, whether there are loops, and whether there are // multiple exits. These have been selected somewhat empirically, but there is probably room for @@ -2510,6 +2645,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) break; case GT_MULHI: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif killMask = RBM_RAX | RBM_RDX; break; @@ -2644,7 +2782,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) } break; -#if defined(PROFILING_SUPPORTED) && defined(_TARGET_AMD64_) +#if defined(PROFILING_SUPPORTED) // If this method requires profiler ELT hook then mark these nodes as killing // callee trash registers (excluding RAX and XMM0). The reason for this is that // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for @@ -2660,10 +2798,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) if (compiler->compIsProfilerHookNeeded()) { killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL); - ; } break; -#endif // PROFILING_SUPPORTED && _TARGET_AMD64_ +#endif // PROFILING_SUPPORTED default: // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE @@ -2769,19 +2906,46 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo return false; } +//---------------------------------------------------------------------------- +// defineNewInternalTemp: Defines a ref position for an internal temp. +// +// Arguments: +// tree - Gentree node requiring an internal register +// regType - Register type +// currentLoc - Location of the temp Def position +// regMask - register mask of candidates for temp +// minRegCandidateCount - Minimum registers to be ensured in candidate +// set under LSRA stress mode. This is a +// DEBUG only arg. RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, LsraLocation currentLoc, - regMaskTP regMask) + regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount)) { Interval* current = newInterval(regType); current->isInternal = true; - return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask); + return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0 DEBUG_ARG(minRegCandidateCount)); } +//------------------------------------------------------------------------ +// buildInternalRegisterDefsForNode - build Def positions for internal +// registers required for tree node. +// +// Arguments: +// tree - Gentree node that needs internal registers +// currentLoc - Location at which Def positions need to be defined +// temps - in-out array which is populated with ref positions +// created for Def of internal registers +// minRegCandidateCount - Minimum registers to be ensured in candidate +// set of ref positions under LSRA stress. This is +// a DEBUG only arg. +// +// Returns: +// The total number of Def positions created for internal registers of tree node. int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, LsraLocation currentLoc, - RefPosition* temps[]) // populates + RefPosition* temps[] // populates + DEBUGARG(unsigned minRegCandidateCount)) { int count; int internalIntCount = tree->gtLsraInfo.internalIntCount; @@ -2805,14 +2969,16 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, internalIntCands = genFindLowestBit(internalIntCands); internalCands &= ~internalIntCands; } - temps[count] = defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands); + temps[count] = + defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands DEBUG_ARG(minRegCandidateCount)); } int internalFloatCount = tree->gtLsraInfo.internalFloatCount; for (int i = 0; i < internalFloatCount; i++) { regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates()); - temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands); + temps[count++] = + defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands DEBUG_ARG(minRegCandidateCount)); } noway_assert(count < MaxInternalRegisters); @@ -2820,10 +2986,26 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, return count; } +//------------------------------------------------------------------------ +// buildInternalRegisterUsesForNode - adds Use positions for internal +// registers required for tree node. +// +// Arguments: +// tree - Gentree node that needs internal registers +// currentLoc - Location at which Use positions need to be defined +// defs - int array containing Def positions of internal +// registers. +// total - Total number of Def positions in 'defs' array. +// minRegCandidateCount - Minimum registers to be ensured in candidate +// set of ref positions under LSRA stress. This is +// a DEBUG only arg. +// +// Returns: +// Void. void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[], - int total) + int total DEBUGARG(unsigned minRegCandidateCount)) { assert(total < MaxInternalRegisters); @@ -2840,8 +3022,14 @@ void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, } else { - RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask); - newest->lastUse = true; + RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, + 0 DEBUG_ARG(minRegCandidateCount)); + newest->lastUse = true; + + if (tree->gtLsraInfo.isInternalRegDelayFree) + { + newest->delayRegFree = true; + } } } } @@ -3196,10 +3384,10 @@ static int ComputeOperandDstCount(GenTree* operand) // If an operand has no destination registers but does have source registers, it must be a store // or a compare. assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || - operand->OperIsCompare()); + operand->OperIsCompare() || operand->IsSIMDEqualityOrInequality()); return 0; } - else if (!operand->OperIsAggregate() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID)) + else if (!operand->OperIsFieldListHead() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID)) { // Stores and void-typed operands may be encountered when processing call nodes, which contain // pointers to argument setup stores. @@ -3207,7 +3395,7 @@ static int ComputeOperandDstCount(GenTree* operand) } else { - // If an aggregate or non-void-typed operand is not an unsued value and does not have source registers, + // If a field list or non-void-typed operand is not an unused value and does not have source registers, // that argument is contained within its parent and produces `sum(operand_dst_count)` registers. int dstCount = 0; for (GenTree* op : operand->Operands()) @@ -3254,16 +3442,14 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, assert(!isRegPairType(tree->TypeGet())); #endif // _TARGET_ARM_ - // The LIR traversal doesn't visit non-aggregate GT_LIST or GT_ARGPLACE nodes + // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. + // GT_CLS_VAR nodes should have been eliminated by rationalizer. assert(tree->OperGet() != GT_ARGPLACE); - assert((tree->OperGet() != GT_LIST) || tree->AsArgList()->IsAggregate()); + assert(tree->OperGet() != GT_LIST); + assert(tree->OperGet() != GT_CLS_VAR); - // These nodes are eliminated by the Rationalizer. - if (tree->OperGet() == GT_CLS_VAR) - { - JITDUMP("Unexpected node %s in LSRA.\n", GenTree::NodeName(tree->OperGet())); - assert(!"Unexpected node in LSRA."); - } + // The LIR traversal visits only the first node in a GT_FIELD_LIST. + assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead()); // The set of internal temporary registers used by this node are stored in the // gtRsvdRegs register mask. Clear it out. @@ -3409,7 +3595,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, { // Get the location info for the register defined by the first operand. LocationInfoList operandDefs; - bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs); + bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs); assert(found); // Since we only expect to consume one register, we should only have a single register to @@ -3503,7 +3689,51 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // (i.e. the target is read-modify-write), preference the dst to op1. bool hasDelayFreeSrc = tree->gtLsraInfo.hasDelayFreeSrc; - if (tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && + +#if defined(DEBUG) && defined(_TARGET_X86_) + // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode + // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the + // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in + // situations like the following: + // + // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d> + // + // /--* t1026 ref + // t1352 = * putarg_reg ref REG NA + // + // t342 = lclVar int V14 loc6 u:4 REG NA $50c + // + // t343 = const int 1 REG NA $41 + // + // /--* t342 int + // +--* t343 int + // t344 = * + int REG NA $495 + // + // t345 = lclVar int V04 arg4 u:2 REG NA $100 + // + // /--* t344 int + // +--* t345 int + // t346 = * % int REG NA $496 + // + // /--* t346 int + // t1353 = * putarg_reg int REG NA + // + // t1354 = lclVar ref V52 tmp35 (last use) REG NA + // + // /--* t1354 ref + // t1355 = * lea(b+0) byref REG NA + // + // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the + // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register + // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to + // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be + // available due to the special put arg. + const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER; +#else + const bool supportsSpecialPutArg = true; +#endif + + if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0) { // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, @@ -3525,9 +3755,17 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, RefPosition* internalRefs[MaxInternalRegisters]; +#ifdef DEBUG + // Number of registers required for tree node is the sum of + // consume + produce + internalCount. This is the minimum + // set of registers that needs to be ensured in candidate + // set of ref positions created. + unsigned minRegCount = consume + produce + info.internalIntCount + info.internalFloatCount; +#endif // DEBUG + // make intervals for all the 'internal' register requirements for this node // where internal means additional registers required temporarily - int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs); + int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs DEBUG_ARG(minRegCount)); // pop all ref'd tree temps GenTreeOperandIterator iterator = tree->OperandsBegin(); @@ -3632,6 +3870,37 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, candidates = fixedAssignment; } +#ifdef DEBUG + // If delayRegFree, then Use will interfere with the destination of + // the consuming node. Therefore, we also need add the kill set of + // consuming node to minRegCount. + // + // For example consider the following IR on x86, where v01 and v02 + // are method args coming in ecx and edx respectively. + // GT_DIV(v01, v02) + // + // For GT_DIV minRegCount will be 3 without adding kill set + // of GT_DIV node. + // + // Assume further JitStressRegs=2, which would constrain + // candidates to callee trashable regs { eax, ecx, edx } on + // use positions of v01 and v02. LSRA allocates ecx for v01. + // Use position of v02 cannot be allocated a regs since it + // is marked delay-reg free and {eax,edx} are getting killed + // before the def of GT_DIV. For this reason, minRegCount + // for Use position of v02 also needs to take into account + // of kill set of its consuming node. + unsigned minRegCountForUsePos = minRegCount; + if (delayRegFree) + { + regMaskTP killMask = getKillSetForNode(tree); + if (killMask != RBM_NONE) + { + minRegCountForUsePos += genCountBits(killMask); + } + } +#endif // DEBUG + RefPosition* pos; if ((candidates & allRegs(i->registerType)) == 0) { @@ -3645,13 +3914,16 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, regNumber physicalReg = genRegNumFromMask(fixedAssignment); RefPosition* pos = newRefPosition(physicalReg, currentLoc, RefTypeFixedReg, nullptr, fixedAssignment); } - pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), multiRegIdx); + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), + multiRegIdx DEBUG_ARG(minRegCountForUsePos)); pos->registerAssignment = candidates; } else { - pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, multiRegIdx); + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, + multiRegIdx DEBUG_ARG(minRegCountForUsePos)); } + if (delayRegFree) { hasDelayFreeSrc = true; @@ -3675,7 +3947,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, listNodePool.ReturnNodes(operandDefs); } - buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount); + buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount DEBUG_ARG(minRegCount)); RegisterType registerType = getDefType(tree); regMaskTP candidates = getDefCandidates(tree); @@ -3708,7 +3980,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, { // Build RefPositions for saving any live large vectors. // This must be done after the kills, so that we know which large vectors are still live. - VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc)); + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1)); } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -3779,7 +4051,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned)i)); } - RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i); + RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, + (unsigned)i DEBUG_ARG(minRegCount)); if (info.isLocalDefUse) { pos->isLocalDefUse = true; @@ -3791,11 +4064,12 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors); + // SaveDef position must be at the same location as Def position of call node. + buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - bool isContainedNode = - !noAdd && consume == 0 && produce == 0 && (tree->OperIsAggregate() || (tree->TypeGet() != TYP_VOID && !tree->OperIsStore())); + bool isContainedNode = !noAdd && consume == 0 && produce == 0 && + (tree->OperIsFieldListHead() || ((tree->TypeGet() != TYP_VOID) && !tree->OperIsStore())); if (isContainedNode) { // Contained nodes map to the concatenated lists of their operands. @@ -3852,6 +4126,22 @@ BasicBlock* getNonEmptyBlock(BasicBlock* block) return block; } +//------------------------------------------------------------------------ +// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block +// +// Notes: +// For each lclVar that is live-in to the first block: +// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created. +// - Otherwise, it will be marked as spilled, since it will not be assigned a register +// on entry and will be loaded from memory on the undefined path. +// Note that, when the compInitMem option is not set, we may encounter these on +// paths that are protected by the same condition as an earlier def. However, since +// we don't do the analysis to determine this - and couldn't rely on always identifying +// such cases even if we tried - we must conservatively treat the undefined path as +// being possible. This is a relatively rare case, so the introduced conservatism is +// not expected to warrant the analysis required to determine the best placement of +// an initialization. +// void LinearScan::insertZeroInitRefPositions() { // insert defs for this, then a block boundary @@ -3861,15 +4151,23 @@ void LinearScan::insertZeroInitRefPositions() { unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; LclVarDsc* varDsc = compiler->lvaTable + varNum; - if (!varDsc->lvIsParam && isCandidateVar(varDsc) && - (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))) + if (!varDsc->lvIsParam && isCandidateVar(varDsc)) { - GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); - JITDUMP("V%02u was live in\n", varNum); - Interval* interval = getIntervalForLocalVar(varNum); - RefPosition* pos = - newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); - varDsc->lvMustInit = true; + JITDUMP("V%02u was live in to first block:", varNum); + Interval* interval = getIntervalForLocalVar(varNum); + if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) + { + JITDUMP(" creating ZeroInit\n"); + GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); + RefPosition* pos = + newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); + varDsc->lvMustInit = true; + } + else + { + setIntervalAsSpilled(interval); + JITDUMP(" marking as spilled\n"); + } } } } @@ -4131,8 +4429,20 @@ void LinearScan::buildIntervals() } #endif // DEBUG +#if DOUBLE_ALIGN + // We will determine whether we should double align the frame during + // identifyCandidates(), but we initially assume that we will not. + doDoubleAlign = false; +#endif + identifyCandidates(); + // Figure out if we're going to use a frame pointer. We need to do this before building + // the ref positions, because those objects will embed the frame register in various register masks + // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will + // remove the frame pointer from the masks. + setFrameType(); + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); // second part: @@ -4263,6 +4573,9 @@ void LinearScan::buildIntervals() insertZeroInitRefPositions(); } + // Any lclVars live-in to a block are resolution candidates. + VarSetOps::UnionD(compiler, resolutionCandidateVars, block->bbLiveIn); + // Determine if we need any DummyDefs. // We need DummyDefs for cases where "predBlock" isn't really a predecessor. // Note that it's possible to have uses of unitialized variables, in which case even the first @@ -4274,8 +4587,8 @@ void LinearScan::buildIntervals() VARSET_TP VARSET_INIT(compiler, newLiveIn, block->bbLiveIn); if (predBlock) { - JITDUMP("\n\nSetting incoming variable registers of BB%02u to outVarToRegMap of BB%02u\n", block->bbNum, - predBlock->bbNum); + JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n", + block->bbNum, predBlock->bbNum); assert(predBlock->bbNum <= bbNumMaxBeforeResolution); blockInfo[block->bbNum].predBBNum = predBlock->bbNum; // Compute set difference: newLiveIn = block->bbLiveIn - predBlock->bbLiveOut @@ -4534,7 +4847,16 @@ void LinearScan::validateIntervals() void LinearScan::setFrameType() { FrameType frameType = FT_NOT_SET; - if (compiler->codeGen->isFramePointerRequired()) +#if DOUBLE_ALIGN + compiler->codeGen->setDoubleAlign(false); + if (doDoubleAlign) + { + frameType = FT_DOUBLE_ALIGN_FRAME; + compiler->codeGen->setDoubleAlign(true); + } + else +#endif // DOUBLE_ALIGN + if (compiler->codeGen->isFramePointerRequired()) { frameType = FT_EBP_FRAME; } @@ -4563,22 +4885,6 @@ void LinearScan::setFrameType() } } -#if DOUBLE_ALIGN - // The DOUBLE_ALIGN feature indicates whether the JIT will attempt to double-align the - // frame if needed. Note that this feature isn't on for amd64, because the stack is - // always double-aligned by default. - compiler->codeGen->setDoubleAlign(false); - - // TODO-CQ: Tune this (see regalloc.cpp, in which raCntWtdStkDblStackFP is used to - // determine whether to double-align). Note, though that there is at least one test - // (jit\opt\Perf\DoubleAlign\Locals.exe) that depends on double-alignment being set - // in certain situations. - if (!compiler->opts.MinOpts() && !compiler->codeGen->isFramePointerRequired() && compiler->compFloatingPointUsed) - { - frameType = FT_DOUBLE_ALIGN_FRAME; - } -#endif // DOUBLE_ALIGN - switch (frameType) { case FT_ESP_FRAME: @@ -4593,7 +4899,6 @@ void LinearScan::setFrameType() case FT_DOUBLE_ALIGN_FRAME: noway_assert(!compiler->codeGen->isFramePointerRequired()); compiler->codeGen->setFramePointerUsed(false); - compiler->codeGen->setDoubleAlign(true); break; #endif // DOUBLE_ALIGN default: @@ -4625,11 +4930,11 @@ void LinearScan::setFrameType() compiler->rpFrameType = frameType; } -// Is the copyReg given by this RefPosition still busy at the +// Is the copyReg/moveReg given by this RefPosition still busy at the // given location? -bool copyRegInUse(RefPosition* ref, LsraLocation loc) +bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc) { - assert(ref->copyReg); + assert(ref->copyReg || ref->moveReg); if (ref->getRefEndLocation() >= loc) { return true; @@ -4689,14 +4994,15 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, return false; } - // Is this a copyReg? It is if the register assignment doesn't match. - // (the recentReference may not be a copyReg, because we could have seen another - // reference since the copyReg) + // Is this a copyReg/moveReg? It is if the register assignment doesn't match. + // (the recentReference may not be a copyReg/moveReg, because we could have seen another + // reference since the copyReg/moveReg) if (!assignedInterval->isAssignedTo(physRegRecord->regNum)) { // Don't reassign it if it's still in use - if (recentReference->copyReg && copyRegInUse(recentReference, currentLoc)) + if ((recentReference->copyReg || recentReference->moveReg) && + copyOrMoveRegInUse(recentReference, currentLoc)) { return false; } @@ -5393,8 +5699,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio // to remain live until the use, we should set the candidates to allRegs(regType) // to avoid a spill - codegen can then insert the copy. assert(candidates == candidateBit); - physRegNextLocation = MaxLocation; - farthestRefPosWeight = BB_MAX_WEIGHT; + + // If a refPosition has a fixed reg as its candidate and is also marked + // as allocateIfProfitable, we should allocate fixed reg only if the + // weight of this ref position is greater than the weight of the ref + // position to which fixed reg is assigned. Such a case would arise + // on x86 under LSRA stress. + if (!allocateIfProfitable) + { + physRegNextLocation = MaxLocation; + farthestRefPosWeight = BB_MAX_WEIGHT; + } } else { @@ -5487,13 +5802,14 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio } } - LsraLocation nextLocation = assignedInterval->getNextRefLocation(); + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + LsraLocation nextLocation = assignedInterval->getNextRefLocation(); // We should never spill a register that's occupied by an Interval with its next use at the current location. // Normally this won't occur (unless we actually had more uses in a single node than there are registers), // because we'll always find something with a later nextLocation, but it can happen in stress when // we have LSRA_SELECT_NEAREST. - if ((nextLocation == refLocation) && !refPosition->isFixedRegRef) + if ((nextLocation == refLocation) && !refPosition->isFixedRegRef && nextRefPosition->RequiresRegister()) { continue; } @@ -5578,7 +5894,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio else { // Must have found a spill candidate. - assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef)); + assert(farthestRefPhysRegRecord != nullptr); + if ((farthestLocation == refLocation) && !refPosition->isFixedRegRef) + { + Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval; + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + assert(!nextRefPosition->RequiresRegister()); + } + else + { + assert(farthestLocation > refLocation || refPosition->isFixedRegRef); + } } #endif @@ -5699,6 +6025,70 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) } //------------------------------------------------------------------------ +// setIntervalAsSplit: Set this Interval as being split +// +// Arguments: +// interval - The Interval which is being split +// +// Return Value: +// None. +// +// Notes: +// The given Interval will be marked as split, and it will be added to the +// set of splitOrSpilledVars. +// +// Assumptions: +// "interval" must be a lclVar interval, as tree temps are never split. +// This is asserted in the call to getVarIndex(). +// +void LinearScan::setIntervalAsSplit(Interval* interval) +{ + if (interval->isLocalVar) + { + unsigned varIndex = interval->getVarIndex(compiler); + if (!interval->isSplit) + { + VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex); + } + else + { + assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex)); + } + } + interval->isSplit = true; +} + +//------------------------------------------------------------------------ +// setIntervalAsSpilled: Set this Interval as being spilled +// +// Arguments: +// interval - The Interval which is being spilled +// +// Return Value: +// None. +// +// Notes: +// The given Interval will be marked as spilled, and it will be added +// to the set of splitOrSpilledVars. +// +void LinearScan::setIntervalAsSpilled(Interval* interval) +{ + if (interval->isLocalVar) + { + unsigned varIndex = interval->getVarIndex(compiler); + if (!interval->isSpilled) + { + VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex); + } + else + { + assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex)); + } + } + interval->isSpilled = true; +} + +//------------------------------------------------------------------------ // spill: Spill this Interval between "fromRefPosition" and "toRefPosition" // // Arguments: @@ -5739,8 +6129,10 @@ void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, } #endif // DEBUG - interval->isActive = false; - interval->isSpilled = true; + INTRACK_STATS(updateLsraStat(LSRA_STAT_SPILL, fromRefPosition->bbNum)); + + interval->isActive = false; + setIntervalAsSpilled(interval); // If fromRefPosition occurs before the beginning of this block, mark this as living in the stack // on entry to this block. @@ -5923,7 +6315,7 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK); if (spillRefPosition->nextRefPosition != nullptr) { - assignedInterval->isSpilled = true; + setIntervalAsSpilled(assignedInterval); } } else @@ -5945,7 +6337,8 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio { assignedInterval->assignedReg = regRec; } - else if (regRec->previousInterval != nullptr && regRec->previousInterval->assignedReg == regRec && + else if (regRec->previousInterval != nullptr && regRec->previousInterval != assignedInterval && + regRec->previousInterval->assignedReg == regRec && regRec->previousInterval->getNextRefPosition() != nullptr) { regRec->assignedInterval = regRec->previousInterval; @@ -6128,7 +6521,14 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc if (allocationPass) { targetReg = predVarToRegMap[varIndex]; - INDEBUG(targetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs))); +#ifdef DEBUG + regNumber newTargetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs)); + if (newTargetReg != targetReg) + { + targetReg = newTargetReg; + setIntervalAsSplit(interval); + } +#endif // DEBUG inVarToRegMap[varIndex] = targetReg; } else // !allocationPass (i.e. resolution/write-back pass) @@ -6686,6 +7086,7 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); didDump = true; allocate = false; + setIntervalAsSpilled(currentInterval); } // If it has no actual references, mark it as "lastUse"; since they're not actually part // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a @@ -6912,6 +7313,7 @@ void LinearScan::allocateRegisters() } currentRefPosition->moveReg = true; assignedRegister = REG_NA; + setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0) @@ -6936,65 +7338,47 @@ void LinearScan::allocateRegisters() } else { - // This must be a localVar or a single-reg fixed use or a tree temp with conflicting def & use. - - assert(currentInterval && (currentInterval->isLocalVar || currentRefPosition->isFixedRegRef || - currentInterval->hasConflictingDefUse)); + assert(currentInterval != nullptr); // It's already in a register, but not one we need. - // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that - // the needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy. - // The reason we need special handling for the "delayRegFree" case is that we need to mark the - // fixed-reg as in-use and delayed (the FixedReg RefPosition doesn't handle the delay requirement). - // Otherwise, if this is a pure use localVar or tree temp, we assign a copyReg, but must free both regs - // if it is a last use. - if (!currentRefPosition->isFixedRegRef || currentRefPosition->delayRegFree) - { - if (!RefTypeIsDef(currentRefPosition->refType)) + if (!RefTypeIsDef(currentRefPosition->refType)) + { + regNumber copyReg = assignCopyReg(currentRefPosition); + assert(copyReg != REG_NA); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg)); + lastAllocatedRefPosition = currentRefPosition; + if (currentRefPosition->lastUse) { - regNumber copyReg = assignCopyReg(currentRefPosition); - assert(copyReg != REG_NA); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg)); - lastAllocatedRefPosition = currentRefPosition; - if (currentRefPosition->lastUse) + if (currentRefPosition->delayRegFree) { - if (currentRefPosition->delayRegFree) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, - assignedRegister)); - delayRegsToFree |= - (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); - } - else - { - INDEBUG( - dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); - regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); - } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, + assignedRegister)); + delayRegsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); } - // If this is a tree temp (non-localVar) interval, we will need an explicit move. - if (!currentInterval->isLocalVar) + else { - currentRefPosition->moveReg = true; - currentRefPosition->copyReg = false; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); + regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); } - continue; } - else + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + if (!currentInterval->isLocalVar) { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); - regsToFree |= genRegMask(assignedRegister); - // We want a new register, but we don't want this to be considered a spill. - assignedRegister = REG_NA; - if (physRegRecord->assignedInterval == currentInterval) - { - unassignPhysRegNoSpill(physRegRecord); - } + currentRefPosition->moveReg = true; + currentRefPosition->copyReg = false; } + continue; } else { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, assignedRegister)); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); + regsToFree |= genRegMask(assignedRegister); + // We want a new register, but we don't want this to be considered a spill. + assignedRegister = REG_NA; + if (physRegRecord->assignedInterval == currentInterval) + { + unassignPhysRegNoSpill(physRegRecord); + } } } } @@ -7031,23 +7415,39 @@ void LinearScan::allocateRegisters() // then find a register to spill if (assignedRegister == REG_NA) { -#ifdef FEATURE_SIMD +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (refType == RefTypeUpperVectorSaveDef) { // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable. - currentRefPosition->registerAssignment = (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH); - assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition); + + // SaveDef position occurs after the Use of args and at the same location as Kill/Def + // positions of a call node. But SaveDef position cannot use any of the arg regs as + // they are needed for call node. + currentRefPosition->registerAssignment = + (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS); + assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition); + // There MUST be caller-save registers available, because they have all just been killed. + // Amd64 Windows: xmm4-xmm5 are guaranteed to be available as xmm0-xmm3 are used for passing args. + // Amd64 Unix: xmm8-xmm15 are guaranteed to be avilable as xmm0-xmm7 are used for passing args. + // X86 RyuJIT Windows: xmm4-xmm7 are guanrateed to be available. assert(assignedRegister != REG_NA); + // Now, spill it. - // (These will look a bit backward in the dump, but it's a pain to dump the alloc before the spill). + // Note: + // i) The reason we have to spill is that SaveDef position is allocated after the Kill positions + // of the call node are processed. Since callee-trash registers are killed by call node + // we explicity spill and unassign the register. + // ii) These will look a bit backward in the dump, but it's a pain to dump the alloc before the + // spill). unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister)); + // Now set assignedRegister to REG_NA again so that we don't re-activate it. assignedRegister = REG_NA; } else -#endif // FEATURE_SIMD +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (currentRefPosition->RequiresRegister() || currentRefPosition->AllocateIfProfitable()) { if (allocateReg) @@ -7069,6 +7469,7 @@ void LinearScan::allocateRegisters() currentRefPosition->registerAssignment = RBM_NONE; currentRefPosition->reload = false; + setIntervalAsSpilled(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); } @@ -7078,6 +7479,7 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); currentRefPosition->registerAssignment = RBM_NONE; currentInterval->isActive = false; + setIntervalAsSpilled(currentInterval); } } #ifdef DEBUG @@ -7224,7 +7626,7 @@ void LinearScan::allocateRegisters() // - interval->physReg is set to the assigned register // (i.e. at the code location which is currently being handled by resolveRegisters()) // - interval->isActive is true iff the interval is live and occupying a register -// - interval->isSpilled is set to true if the interval is EVER spilled +// - interval->isSpilled should have already been set to true if the interval is EVER spilled // - interval->isSplit is set to true if the interval does not occupy the same // register throughout the method // - RegRecord->assignedInterval points to the interval which currently occupies @@ -7264,9 +7666,9 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi if (currentRefPosition->registerAssignment == RBM_NONE) { assert(!currentRefPosition->RequiresRegister()); + assert(interval->isSpilled); - interval->isSpilled = true; - varDsc->lvRegNum = REG_STK; + varDsc->lvRegNum = REG_STK; if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval) { interval->assignedReg->assignedInterval = nullptr; @@ -7314,8 +7716,10 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi // In the reload case we simply do not set GTF_REG_VAL, and it gets // referenced from the variable's home location. // This is also true for a pure def which is spilled. - if (reload && currentRefPosition->refType != RefTypeDef) + if (reload) { + assert(currentRefPosition->refType != RefTypeDef); + assert(interval->isSpilled); varDsc->lvRegNum = REG_STK; if (!spillAfter) { @@ -7353,31 +7757,15 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi { assert(currentRefPosition->refType == RefTypeExpUse); } - - // If we have an undefined use set it as non-reg - if (!interval->isSpilled) - { - if (varDsc->lvIsParam && !varDsc->lvIsRegArg && currentRefPosition == interval->firstRefPosition) - { - // Parameters are the only thing that can be used before defined - } - else - { - // if we see a use before def of something else, the zero init flag better not be set. - noway_assert(!compiler->info.compInitMem); - // if it is not set, then the behavior is undefined but we don't want to crash or assert - interval->isSpilled = true; - } - } } else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType)) { // In the case of a pure def, don't bother spilling - just assign it to the // stack. However, we need to remember that it was spilled. - interval->isSpilled = true; - varDsc->lvRegNum = REG_STK; - interval->physReg = REG_NA; + assert(interval->isSpilled); + varDsc->lvRegNum = REG_STK; + interval->physReg = REG_NA; if (treeNode != nullptr) { treeNode->gtRegNum = REG_NA; @@ -7409,6 +7797,7 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi } else { + assert(interval->isSplit); interval->physReg = assignedReg; } @@ -7426,13 +7815,11 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi { if (varDsc->lvRegNum != REG_STK) { - // If the register assignments don't match, then this interval is spilt, - // but not spilled (yet) - // However, we don't have a single register assignment now + // If the register assignments don't match, then this interval is split. if (varDsc->lvRegNum != assignedReg) { - interval->isSplit = TRUE; - varDsc->lvRegNum = REG_STK; + setIntervalAsSplit(interval); + varDsc->lvRegNum = REG_STK; } } else @@ -7447,9 +7834,9 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi { treeNode->gtFlags |= GTF_SPILL; } - interval->isSpilled = true; - interval->physReg = REG_NA; - varDsc->lvRegNum = REG_STK; + assert(interval->isSpilled); + interval->physReg = REG_NA; + varDsc->lvRegNum = REG_STK; } // This value is in a register, UNLESS we already saw this treeNode @@ -7489,6 +7876,7 @@ void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree) // than the one it was spilled from (GT_RELOAD). // // Arguments: +// block - basic block in which GT_COPY/GT_RELOAD is inserted. // tree - This is the node to copy or reload. // Insert copy or reload node between this node and its parent. // multiRegIdx - register position of tree node for which copy or reload is needed. @@ -7557,6 +7945,10 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned else { oper = GT_COPY; + +#if TRACK_LSRA_STATS + updateLsraStat(LSRA_STAT_COPY_REG, block->bbNum); +#endif } // If the parent is a reload/copy node, then tree must be a multi-reg call node @@ -8100,7 +8492,7 @@ void LinearScan::resolveRegisters() { JITDUMP(" internal"); GenTreePtr indNode = nullptr; - if (treeNode->OperIsIndir()) + if (treeNode->OperGet() == GT_IND) { indNode = treeNode; JITDUMP(" allocated at GT_IND"); @@ -8223,6 +8615,11 @@ void LinearScan::resolveRegisters() printf("RESOLVING BB BOUNDARIES\n"); printf("-----------------------\n"); + printf("Resolution Candidates: "); + dumpConvertedVarSet(compiler, resolutionCandidateVars); + printf("\n"); + printf("Has %sCritical Edges\n\n", hasCriticalEdges ? "" : "No"); + printf("Prior to Resolution\n"); foreach_block(compiler, block) { @@ -8282,23 +8679,10 @@ void LinearScan::resolveRegisters() varDsc->lvArgInitReg = initialReg; JITDUMP(" Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg)); } - if (!varDsc->lvIsRegArg) - { - // stack arg - if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)) - { - if (sourceReg != initialReg) - { - // The code generator won't initialize struct - // fields, so we have to do that if it's not already - // where it belongs. - assert(interval->isStructField); - JITDUMP(" Move struct field param V%02u from %s to %s\n", lclNum, getRegName(sourceReg), - getRegName(initialReg)); - insertMove(insertionBlock, insertionPoint, lclNum, sourceReg, initialReg); - } - } - } + + // Stack args that are part of dependently-promoted structs should never be register candidates (see + // LinearScan::isRegCandidate). + assert(varDsc->lvIsRegArg || !compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)); } // If lvRegNum is REG_STK, that means that either no register @@ -8347,8 +8731,8 @@ void LinearScan::resolveRegisters() } if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter) { - // Either this RefPosition is spilled, or it is not a "real" def or use - assert(firstRefPosition->spillAfter || + // Either this RefPosition is spilled, or regOptional or it is not a "real" def or use + assert(firstRefPosition->spillAfter || firstRefPosition->AllocateIfProfitable() || (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse)); varDsc->lvRegNum = REG_STK; } @@ -8432,6 +8816,8 @@ void LinearScan::insertMove( BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg) { LclVarDsc* varDsc = compiler->lvaTable + lclNum; + // the lclVar must be a register candidate + assert(isRegCandidate(varDsc)); // One or both MUST be a register assert(fromReg != REG_STK || toReg != REG_STK); // They must not be the same register. @@ -8440,20 +8826,22 @@ void LinearScan::insertMove( // This var can't be marked lvRegister now varDsc->lvRegNum = REG_STK; - var_types lclTyp = varDsc->TypeGet(); - if (varDsc->lvNormalizeOnStore()) - { - lclTyp = genActualType(lclTyp); - } - GenTreePtr src = compiler->gtNewLclvNode(lclNum, lclTyp); + GenTreePtr src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet()); src->gtLsraInfo.isLsraAdded = true; - GenTreePtr top; - // If we are moving from STK to reg, mark the lclVar nodes with GTF_SPILLED - // Otherwise, if we are moving from reg to stack, mark it as GTF_SPILL - // Finally, for a reg-to-reg move, generate a GT_COPY + // There are three cases we need to handle: + // - We are loading a lclVar from the stack. + // - We are storing a lclVar to the stack. + // - We are copying a lclVar between registers. + // + // In the first and second cases, the lclVar node will be marked with GTF_SPILLED and GTF_SPILL, respectively. + // It is up to the code generator to ensure that any necessary normalization is done when loading or storing the + // lclVar's value. + // + // In the third case, we generate GT_COPY(GT_LCL_VAR) and type each node with the normalized type of the lclVar. + // This is safe because a lclVar is always normalized once it is in a register. - top = src; + GenTree* dst = src; if (fromReg == REG_STK) { src->gtFlags |= GTF_SPILLED; @@ -8467,21 +8855,22 @@ void LinearScan::insertMove( } else { - top = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, varDsc->TypeGet(), src); + var_types movType = genActualType(varDsc->TypeGet()); + src->gtType = movType; + + dst = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, movType, src); // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag. // Note that if src is itself a lastUse, this will have no effect. - top->gtFlags &= ~(GTF_VAR_DEATH); + dst->gtFlags &= ~(GTF_VAR_DEATH); src->gtRegNum = fromReg; src->SetInReg(); - top->gtRegNum = toReg; - src->gtNext = top; - top->gtPrev = src; + dst->gtRegNum = toReg; src->gtLsraInfo.isLocalDefUse = false; - top->gtLsraInfo.isLsraAdded = true; + dst->gtLsraInfo.isLsraAdded = true; } - top->gtLsraInfo.isLocalDefUse = true; + dst->gtLsraInfo.isLocalDefUse = true; - LIR::Range treeRange = LIR::SeqTree(compiler, top); + LIR::Range treeRange = LIR::SeqTree(compiler, dst); LIR::Range& blockRange = LIR::AsRange(block); if (insertionPoint != nullptr) @@ -8497,7 +8886,7 @@ void LinearScan::insertMove( noway_assert(!blockRange.IsEmpty()); GenTree* branch = blockRange.LastNode(); - assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE || + assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE || branch->OperGet() == GT_SWITCH); blockRange.InsertBefore(branch, std::move(treeRange)); @@ -8568,7 +8957,7 @@ void LinearScan::insertSwap( noway_assert(!blockRange.IsEmpty()); GenTree* branch = blockRange.LastNode(); - assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE || + assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE || branch->OperGet() == GT_SWITCH); blockRange.InsertBefore(branch, std::move(swapRange)); @@ -8682,12 +9071,15 @@ void LinearScan::addResolution( insertMove(block, insertionPoint, interval->varNum, fromReg, toReg); if (fromReg == REG_STK || toReg == REG_STK) { - interval->isSpilled = true; + assert(interval->isSpilled); } else { - interval->isSplit = true; + // We should have already marked this as spilled or split. + assert((interval->isSpilled) || (interval->isSplit)); } + + INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum)); } //------------------------------------------------------------------------ @@ -8706,6 +9098,12 @@ void LinearScan::addResolution( void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) { + VARSET_TP VARSET_INIT_NOCOPY(outResolutionSet, + VarSetOps::Intersection(compiler, block->bbLiveOut, resolutionCandidateVars)); + if (VarSetOps::IsEmpty(compiler, outResolutionSet)) + { + return; + } VARSET_TP VARSET_INIT_NOCOPY(sameResolutionSet, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(sameLivePathsSet, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(singleTargetSet, VarSetOps::MakeEmpty(compiler)); @@ -8720,6 +9118,8 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // First, determine the live regs at the end of this block so that we know what regs are // available to copy into. + // Note that for this purpose we use the full live-out set, because we must ensure that + // even the registers that remain the same across the edge are preserved correctly. regMaskTP liveOutRegs = RBM_NONE; VARSET_ITER_INIT(compiler, iter1, block->bbLiveOut, varIndex1); while (iter1.NextElem(compiler, &varIndex1)) @@ -8755,7 +9155,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) regMaskTP sameWriteRegs = RBM_NONE; regMaskTP diffReadRegs = RBM_NONE; - // For each var, classify them as: + // For each var that may require resolution, classify them as: // - in the same register at the end of this block and at each target (no resolution needed) // - in different registers at different targets (resolve separately): // diffResolutionSet @@ -8764,7 +9164,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // write to any registers that are read by those in the diffResolutionSet: // sameResolutionSet - VARSET_ITER_INIT(compiler, iter, block->bbLiveOut, varIndex); + VARSET_ITER_INIT(compiler, iter, outResolutionSet, varIndex); while (iter.NextElem(compiler, &varIndex)) { unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; @@ -8936,6 +9336,16 @@ void LinearScan::resolveEdges() { JITDUMP("RESOLVING EDGES\n"); + // The resolutionCandidateVars set was initialized with all the lclVars that are live-in to + // any block. We now intersect that set with any lclVars that ever spilled or split. + // If there are no candidates for resoultion, simply return. + + VarSetOps::IntersectionD(compiler, resolutionCandidateVars, splitOrSpilledVars); + if (VarSetOps::IsEmpty(compiler, resolutionCandidateVars)) + { + return; + } + BasicBlock *block, *prevBlock = nullptr; // Handle all the critical edges first. @@ -8944,18 +9354,21 @@ void LinearScan::resolveEdges() // remaining mismatches. We visit the out-edges, as that allows us to share the moves that are // common among allt he targets. - foreach_block(compiler, block) + if (hasCriticalEdges) { - if (block->bbNum > bbNumMaxBeforeResolution) - { - // This is a new block added during resolution - we don't need to visit these now. - continue; - } - if (blockInfo[block->bbNum].hasCriticalOutEdge) + foreach_block(compiler, block) { - handleOutgoingCriticalEdges(block); + if (block->bbNum > bbNumMaxBeforeResolution) + { + // This is a new block added during resolution - we don't need to visit these now. + continue; + } + if (blockInfo[block->bbNum].hasCriticalOutEdge) + { + handleOutgoingCriticalEdges(block); + } + prevBlock = block; } - prevBlock = block; } prevBlock = nullptr; @@ -8975,7 +9388,9 @@ void LinearScan::resolveEdges() // we may need resolution at the beginning of this block. // This may be true even if it's the block we used for starting locations, // if a variable was spilled. - if (!VarSetOps::IsEmpty(compiler, block->bbLiveIn)) + VARSET_TP VARSET_INIT_NOCOPY(inResolutionSet, + VarSetOps::Intersection(compiler, block->bbLiveIn, resolutionCandidateVars)); + if (!VarSetOps::IsEmpty(compiler, inResolutionSet)) { if (uniquePredBlock != nullptr) { @@ -8988,7 +9403,7 @@ void LinearScan::resolveEdges() uniquePredBlock = uniquePredBlock->GetUniquePred(compiler); noway_assert(uniquePredBlock != nullptr); } - resolveEdge(uniquePredBlock, block, ResolveSplit, block->bbLiveIn); + resolveEdge(uniquePredBlock, block, ResolveSplit, inResolutionSet); } } @@ -9003,7 +9418,12 @@ void LinearScan::resolveEdges() BasicBlock* succBlock = block->GetSucc(0, compiler); if (succBlock->GetUniquePred(compiler) == nullptr) { - resolveEdge(block, succBlock, ResolveJoin, succBlock->bbLiveIn); + VARSET_TP VARSET_INIT_NOCOPY(outResolutionSet, VarSetOps::Intersection(compiler, succBlock->bbLiveIn, + resolutionCandidateVars)); + if (!VarSetOps::IsEmpty(compiler, outResolutionSet)) + { + resolveEdge(block, succBlock, ResolveJoin, outResolutionSet); + } } } } @@ -9161,6 +9581,9 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // in resolveEdges(), after all the edge resolution has been done (by calling this // method for each edge). block = compiler->fgSplitEdge(fromBlock, toBlock); + + // Split edges are counted against fromBlock. + INTRACK_STATS(updateLsraStat(LSRA_STAT_SPLIT_EDGE, fromBlock->bbNum)); break; default: unreached(); @@ -9347,11 +9770,13 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, { useSwap = true; } -#else // !_TARGET_XARCH_ +#else // !_TARGET_XARCH_ + else { tempReg = tempRegInt; } + #endif // !_TARGET_XARCH_ if (useSwap || tempReg == REG_NA) { @@ -9396,6 +9821,8 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, sourceIntervals[sourceReg]->varNum, fromReg); location[sourceReg] = REG_NA; location[source[otherTargetReg]] = (regNumberSmall)fromReg; + + INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum)); } else { @@ -9406,6 +9833,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // First, spill "otherInterval" from targetReg to the stack. Interval* otherInterval = sourceIntervals[source[otherTargetReg]]; + setIntervalAsSpilled(otherInterval); addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg); JITDUMP(" (%s)\n", resolveTypeName[resolveType]); location[source[otherTargetReg]] = REG_STK; @@ -9527,6 +9955,126 @@ void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask) internalCandsIndex = (unsigned char)i; } +#if TRACK_LSRA_STATS +// ---------------------------------------------------------- +// updateLsraStat: Increment LSRA stat counter. +// +// Arguments: +// stat - LSRA stat enum +// bbNum - Basic block to which LSRA stat needs to be +// associated with. +// +void LinearScan::updateLsraStat(LsraStat stat, unsigned bbNum) +{ + if (bbNum > bbNumMaxBeforeResolution) + { + // This is a newly created basic block as part of resolution. + // These blocks contain resolution moves that are already accounted. + return; + } + + switch (stat) + { + case LSRA_STAT_SPILL: + ++(blockInfo[bbNum].spillCount); + break; + + case LSRA_STAT_COPY_REG: + ++(blockInfo[bbNum].copyRegCount); + break; + + case LSRA_STAT_RESOLUTION_MOV: + ++(blockInfo[bbNum].resolutionMovCount); + break; + + case LSRA_STAT_SPLIT_EDGE: + ++(blockInfo[bbNum].splitEdgeCount); + break; + + default: + break; + } +} + +// ----------------------------------------------------------- +// dumpLsraStats - dumps Lsra stats to given file. +// +// Arguments: +// file - file to which stats are to be written. +// +void LinearScan::dumpLsraStats(FILE* file) +{ + unsigned sumSpillCount = 0; + unsigned sumCopyRegCount = 0; + unsigned sumResolutionMovCount = 0; + unsigned sumSplitEdgeCount = 0; + UINT64 wtdSpillCount = 0; + UINT64 wtdCopyRegCount = 0; + UINT64 wtdResolutionMovCount = 0; + + fprintf(file, "----------\n"); + fprintf(file, "LSRA Stats"); +#ifdef DEBUG + if (!VERBOSE) + { + fprintf(file, " : %s\n", compiler->info.compFullName); + } + else + { + // In verbose mode no need to print full name + // while printing lsra stats. + fprintf(file, "\n"); + } +#else + fprintf(file, " : %s\n", compiler->eeGetMethodFullName(compiler->info.compCompHnd)); +#endif + + fprintf(file, "----------\n"); + + for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) + { + if (block->bbNum > bbNumMaxBeforeResolution) + { + continue; + } + + unsigned spillCount = blockInfo[block->bbNum].spillCount; + unsigned copyRegCount = blockInfo[block->bbNum].copyRegCount; + unsigned resolutionMovCount = blockInfo[block->bbNum].resolutionMovCount; + unsigned splitEdgeCount = blockInfo[block->bbNum].splitEdgeCount; + + if (spillCount != 0 || copyRegCount != 0 || resolutionMovCount != 0 || splitEdgeCount != 0) + { + fprintf(file, "BB%02u [%8d]: ", block->bbNum, block->bbWeight); + fprintf(file, "SpillCount = %d, ResolutionMovs = %d, SplitEdges = %d, CopyReg = %d\n", spillCount, + resolutionMovCount, splitEdgeCount, copyRegCount); + } + + sumSpillCount += spillCount; + sumCopyRegCount += copyRegCount; + sumResolutionMovCount += resolutionMovCount; + sumSplitEdgeCount += splitEdgeCount; + + wtdSpillCount += (UINT64)spillCount * block->bbWeight; + wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight; + wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight; + } + + fprintf(file, "Total Spill Count: %d Weighted: %I64u\n", sumSpillCount, wtdSpillCount); + fprintf(file, "Total CopyReg Count: %d Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount); + fprintf(file, "Total ResolutionMov Count: %d Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount); + fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount); + + // compute total number of spill temps created + unsigned numSpillTemps = 0; + for (int i = 0; i < TYP_COUNT; i++) + { + numSpillTemps += maxSpill[i]; + } + fprintf(file, "Total Number of spill temps created: %d\n\n", numSpillTemps); +} +#endif // TRACK_LSRA_STATS + #ifdef DEBUG void dumpRegMask(regMaskTP regs) { @@ -9645,6 +10193,11 @@ void RefPosition::dump() { printf(" outOfOrder"); } + + if (this->AllocateIfProfitable()) + { + printf(" regOptional"); + } printf(">\n"); } @@ -11329,9 +11882,18 @@ void LinearScan::verifyFinalAllocation() { if (VERBOSE) { + // If refPos is marked as copyReg, then the reg that is spilled + // is the homeReg of the interval not the reg currently assigned + // to refPos. + regNumber spillReg = regNum; + if (currentRefPosition->copyReg) + { + assert(interval != nullptr); + spillReg = interval->physReg; + } dumpRegRecords(); dumpEmptyRefPosition(); - printf("Spill %-4s ", getRegName(regNum)); + printf("Spill %-4s ", getRegName(spillReg)); } } else if (currentRefPosition->copyReg) @@ -11392,15 +11954,14 @@ void LinearScan::verifyFinalAllocation() interval->physReg = REG_NA; interval->assignedReg = nullptr; - // regRegcord could be null if RefPosition is to be allocated a - // reg only if profitable. + // regRegcord could be null if the RefPosition does not require a register. if (regRecord != nullptr) { regRecord->assignedInterval = nullptr; } else { - assert(currentRefPosition->AllocateIfProfitable()); + assert(!currentRefPosition->RequiresRegister()); } } } @@ -11506,6 +12067,8 @@ void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation curr assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum); leftInterval->physReg = rightRegNum; rightInterval->physReg = leftRegNum; + leftInterval->assignedReg = &physRegs[rightRegNum]; + rightInterval->assignedReg = &physRegs[leftRegNum]; physRegs[rightRegNum].assignedInterval = leftInterval; physRegs[leftRegNum].assignedInterval = rightInterval; if (VERBOSE) |