diff options
author | Carol Eidt <carol.eidt@microsoft.com> | 2019-04-01 13:33:12 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-01 13:33:12 -0700 |
commit | d686be5513cd09426704eb576858411d1e736d0d (patch) | |
tree | 6040ba17d4035d46bb696a82169204dd8dc3ac01 /src/jit | |
parent | 0ec4acb43bd0344e680b3c608517389267a198a2 (diff) | |
download | coreclr-d686be5513cd09426704eb576858411d1e736d0d.tar.gz coreclr-d686be5513cd09426704eb576858411d1e736d0d.tar.bz2 coreclr-d686be5513cd09426704eb576858411d1e736d0d.zip |
LSRA cleanup (#23617)
* LSRA cleanup
These are zero-diff changes. Some cleanup, some in preparation for improvemetns to save/restore of upper vectors.
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/lsra.cpp | 160 | ||||
-rw-r--r-- | src/jit/lsra.h | 26 | ||||
-rw-r--r-- | src/jit/lsrabuild.cpp | 9 |
3 files changed, 120 insertions, 75 deletions
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index e46ac751ea..29de16f7cc 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -608,6 +608,7 @@ LinearScanInterface* getLinearScanAllocator(Compiler* comp) LinearScan::LinearScan(Compiler* theCompiler) : compiler(theCompiler) , intervals(theCompiler->getAllocator(CMK_LSRA_Interval)) + , allocationPassComplete(false) , refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition)) , listNodePool(theCompiler) { @@ -1191,6 +1192,7 @@ void LinearScan::doLinearScan() clearVisitedBlocks(); initVarRegMaps(); allocateRegisters(); + allocationPassComplete = true; compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC); resolveRegisters(); compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE); @@ -3604,16 +3606,16 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio // codegen considers them as contained memory operands. CLANG_FORMAT_COMMENT_ANCHOR; #ifdef _TARGET_ARM_ - // TODO-CQ-ARM: Just conservatively "and" two condition. We may implement better condision later. + // TODO-CQ-ARM: Just conservatively "and" two conditions. We may implement a better condition later. isBetterLocation = true; if (recentAssignedRef != nullptr) - isBetterLocation &= (recentAssignedRef->reload && recentAssignedRef->AllocateIfProfitable()); + isBetterLocation &= (recentAssignedRef->reload && recentAssignedRef->RegOptional()); if (recentAssignedRef2 != nullptr) - isBetterLocation &= (recentAssignedRef2->reload && recentAssignedRef2->AllocateIfProfitable()); + isBetterLocation &= (recentAssignedRef2->reload && recentAssignedRef2->RegOptional()); #else - isBetterLocation = (recentAssignedRef != nullptr) && recentAssignedRef->reload && - recentAssignedRef->AllocateIfProfitable(); + isBetterLocation = + (recentAssignedRef != nullptr) && recentAssignedRef->reload && recentAssignedRef->RegOptional(); #endif } else @@ -4378,7 +4380,7 @@ void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock) BasicBlock* nextBlock = getNextBlock(); if (nextBlock != nullptr) { - processBlockStartLocations(nextBlock, true); + processBlockStartLocations(nextBlock); } } @@ -4622,11 +4624,11 @@ void LinearScan::unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap in // determine the lclVar locations for the inVarToRegMap. // During the resolution (write-back) pass, we only modify the inVarToRegMap in cases where // a lclVar was spilled after the block had been completed. -void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool allocationPass) +void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) { // If we have no register candidates we should only call this method during allocation. - assert(enregisterLocalVars || allocationPass); + assert(enregisterLocalVars || !allocationPassComplete); if (!enregisterLocalVars) { @@ -4642,7 +4644,6 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc physRegRecord->assignedInterval = nullptr; } } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); return; } @@ -4677,7 +4678,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc RefPosition* nextRefPosition = interval->getNextRefPosition(); assert(nextRefPosition != nullptr); - if (allocationPass) + if (!allocationPassComplete) { targetReg = getVarReg(predVarToRegMap, varIndex); #ifdef DEBUG @@ -4690,7 +4691,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc #endif // DEBUG setVarReg(inVarToRegMap, varIndex, targetReg); } - else // !allocationPass (i.e. resolution/write-back pass) + else // allocationPassComplete (i.e. resolution/write-back pass) { targetReg = getVarReg(inVarToRegMap, varIndex); // There are four cases that we need to consider during the resolution pass: @@ -4758,7 +4759,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc interval->physReg = REG_NA; } } - else if (allocationPass) + else if (!allocationPassComplete) { // Keep the register assignment - if another var has it, it will get unassigned. // Otherwise, resolution will fix it up later, and it will be more @@ -4795,10 +4796,10 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc { assert(genIsValidDoubleReg(targetReg)); unassignIntervalBlockStart(findAnotherHalfRegRec(targetRegRecord), - allocationPass ? inVarToRegMap : nullptr); + allocationPassComplete ? nullptr : inVarToRegMap); } #endif // _TARGET_ARM_ - unassignIntervalBlockStart(targetRegRecord, allocationPass ? inVarToRegMap : nullptr); + unassignIntervalBlockStart(targetRegRecord, allocationPassComplete ? nullptr : inVarToRegMap); assignPhysReg(targetRegRecord, interval); } if (interval->recentRefPosition != nullptr && !interval->recentRefPosition->copyReg && @@ -4868,7 +4869,6 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc } #endif // _TARGET_ARM_ } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); } //------------------------------------------------------------------------ @@ -5159,7 +5159,6 @@ void LinearScan::allocateRegisters() #ifdef DEBUG activeRefPosition = currentRefPosition; -#endif // DEBUG // For the purposes of register resolution, we handle the DummyDefs before // the block boundary - so the RefTypeBB is after all the DummyDefs. @@ -5168,6 +5167,14 @@ void LinearScan::allocateRegisters() // that aren't live in the next block and make them available for the // DummyDefs. + // If we've already handled the BlockEnd, but now we're seeing the RefTypeBB, + // dump it now. + if ((refType == RefTypeBB) && handledBlockEnd) + { + dumpNewBlock(currentBlock, currentRefPosition->nodeLocation); + } +#endif // DEBUG + if (!handledBlockEnd && (refType == RefTypeBB || refType == RefTypeDummyDef)) { // Free any delayed regs (now in regsToFree) before processing the block boundary @@ -5184,6 +5191,7 @@ void LinearScan::allocateRegisters() { processBlockEndAllocation(currentBlock); currentBlock = moveToNextBlock(); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); } } @@ -5491,8 +5499,9 @@ void LinearScan::allocateRegisters() if (assignedRegister != REG_NA) { - // If there is a conflicting fixed reference, insert a copy. RegRecord* physRegRecord = getRegisterRecord(assignedRegister); + + // If there is a conflicting fixed reference, insert a copy. if (physRegRecord->conflictingFixedRegReference(currentRefPosition)) { // We may have already reassigned the register to the conflicting reference. @@ -5577,7 +5586,7 @@ void LinearScan::allocateRegisters() { bool allocateReg = true; - if (currentRefPosition->AllocateIfProfitable()) + if (currentRefPosition->RegOptional()) { // We can avoid allocating a register if it is a the last use requiring a reload. if (currentRefPosition->lastUse && currentRefPosition->reload) @@ -5641,12 +5650,12 @@ void LinearScan::allocateRegisters() } else #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (currentRefPosition->RequiresRegister() || currentRefPosition->AllocateIfProfitable()) + if (currentRefPosition->RequiresRegister() || currentRefPosition->RegOptional()) { if (allocateReg) { - assignedRegister = allocateBusyReg(currentInterval, currentRefPosition, - currentRefPosition->AllocateIfProfitable()); + assignedRegister = + allocateBusyReg(currentInterval, currentRefPosition, currentRefPosition->RegOptional()); } if (assignedRegister != REG_NA) @@ -5658,7 +5667,7 @@ void LinearScan::allocateRegisters() { // This can happen only for those ref positions that are to be allocated // only if profitable. - noway_assert(currentRefPosition->AllocateIfProfitable()); + noway_assert(currentRefPosition->RegOptional()); currentRefPosition->registerAssignment = RBM_NONE; currentRefPosition->reload = false; @@ -5928,10 +5937,8 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi // Is this a tracked local? Or just a register allocated for loading // a non-tracked one? Interval* interval = currentRefPosition->getInterval(); - if (!interval->isLocalVar) - { - return; - } + assert(interval->isLocalVar); + interval->recentRefPosition = currentRefPosition; LclVarDsc* varDsc = interval->getLocalVar(compiler); @@ -6025,7 +6032,7 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi treeNode->gtFlags |= GTF_SPILLED; if (spillAfter) { - if (currentRefPosition->AllocateIfProfitable()) + if (currentRefPosition->RegOptional()) { // This is a use of lclVar that is flagged as reg-optional // by lower/codegen and marked for both reload and spillAfter. @@ -6064,10 +6071,8 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi treeNode->gtRegNum = REG_NA; } } - else + else // Not reload and Not pure-def that's spillAfter { - // Not reload and Not pure-def that's spillAfter - if (currentRefPosition->copyReg || currentRefPosition->moveReg) { // For a copyReg or moveReg, we have two cases: @@ -6446,7 +6451,7 @@ void LinearScan::updateMaxSpill(RefPosition* refPosition) RefType refType = refPosition->refType; if (refPosition->spillAfter || refPosition->reload || - (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA)) + (refPosition->RegOptional() && refPosition->assignedReg() == REG_NA)) { Interval* interval = refPosition->getInterval(); if (!interval->isLocalVar) @@ -6517,7 +6522,7 @@ void LinearScan::updateMaxSpill(RefPosition* refPosition) assert(currentSpill[typ] > 0); currentSpill[typ]--; } - else if (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA) + else if (refPosition->RegOptional() && refPosition->assignedReg() == REG_NA) { // A spill temp not getting reloaded into a reg because it is // marked as allocate if profitable and getting used from its @@ -6643,7 +6648,7 @@ void LinearScan::resolveRegisters() curBBStartLocation = currentRefPosition->nodeLocation; if (block != compiler->fgFirstBB) { - processBlockStartLocations(block, false); + processBlockStartLocations(block); } // Handle the DummyDefs, updating the incoming var location. @@ -6865,7 +6870,7 @@ void LinearScan::resolveRegisters() } else { - assert(nextRefPosition->AllocateIfProfitable()); + assert(nextRefPosition->RegOptional()); // In case of tree temps, if def is spilled and use didn't // get a register, set a flag on tree node to be treated as @@ -7023,7 +7028,7 @@ void LinearScan::resolveRegisters() { // Either this RefPosition is spilled, or regOptional or it is not a "real" def or use assert( - firstRefPosition->spillAfter || firstRefPosition->AllocateIfProfitable() || + firstRefPosition->spillAfter || firstRefPosition->RegOptional() || (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse)); varDsc->lvRegNum = REG_STK; } @@ -7169,7 +7174,6 @@ void LinearScan::insertMove( else { // Put the copy at the bottom - // If there's a branch, make an embedded statement that executes just prior to the branch if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH) { noway_assert(!blockRange.IsEmpty()); @@ -8610,7 +8614,7 @@ void RefPosition::dump() printf(" outOfOrder"); } - if (this->AllocateIfProfitable()) + if (this->RegOptional()) { printf(" regOptional"); } @@ -9372,8 +9376,19 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, // Block boundaries case LSRA_EVENT_START_BB: - assert(currentBlock != nullptr); - dumpRefPositionShort(activeRefPosition, currentBlock); + // The RefTypeBB comes after the RefTypeDummyDefs associated with that block, + // so we may have a RefTypeDummyDef at the time we dump this event. + // In that case we'll have another "EVENT" associated with it, so we need to + // print the full line now. + if (activeRefPosition->refType != RefTypeBB) + { + dumpNewBlock(currentBlock, activeRefPosition->nodeLocation); + dumpRegRecords(); + } + else + { + dumpRefPositionShort(activeRefPosition, currentBlock); + } break; // Allocation decisions @@ -9700,6 +9715,48 @@ void LinearScan::dumpEmptyRefPosition() printf(emptyRefPositionFormat, ""); } +//------------------------------------------------------------------------ +// dumpNewBlock: Dump a line for a new block in a column-based dump of the register state. +// +// Arguments: +// currentBlock - the new block to be dumped +// +void LinearScan::dumpNewBlock(BasicBlock* currentBlock, LsraLocation location) +{ + if (!VERBOSE) + { + return; + } + + // Always print a title row before a RefTypeBB (except for the first, because we + // will already have printed it before the parameters) + if ((currentBlock != compiler->fgFirstBB) && (currentBlock != nullptr)) + { + dumpRegRecordTitle(); + } + // If the activeRefPosition is a DummyDef, then don't print anything further (printing the + // title line makes it clearer that we're "about to" start the next block). + if (activeRefPosition->refType == RefTypeDummyDef) + { + dumpEmptyRefPosition(); + printf("DDefs "); + printf(regNameFormat, ""); + return; + } + printf(shortRefPositionFormat, location, activeRefPosition->rpNum); + if (currentBlock == nullptr) + { + printf(regNameFormat, "END"); + printf(" "); + printf(regNameFormat, ""); + } + else + { + printf(bbRefPosFormat, currentBlock->bbNum, + currentBlock == compiler->fgFirstBB ? 0 : blockInfo[currentBlock->bbNum].predBBNum); + } +} + // Note that the size of this dump is computed in dumpRegRecordHeader(). // void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock) @@ -9714,29 +9771,11 @@ void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* curr lastPrintedRefPosition = refPosition; if (refPosition->refType == RefTypeBB) { - // Always print a title row before a RefTypeBB (except for the first, because we - // will already have printed it before the parameters) - if (refPosition->refType == RefTypeBB && block != compiler->fgFirstBB && block != nullptr) - { - dumpRegRecordTitle(); - } + dumpNewBlock(currentBlock, refPosition->nodeLocation); + return; } printf(shortRefPositionFormat, refPosition->nodeLocation, refPosition->rpNum); - if (refPosition->refType == RefTypeBB) - { - if (block == nullptr) - { - printf(regNameFormat, "END"); - printf(" "); - // We still need to print this refposition. - lastPrintedRefPosition = nullptr; - } - else - { - printf(bbRefPosFormat, block->bbNum, block == compiler->fgFirstBB ? 0 : blockInfo[block->bbNum].predBBNum); - } - } - else if (refPosition->isIntervalRef()) + if (refPosition->isIntervalRef()) { Interval* interval = refPosition->getInterval(); dumpIntervalName(interval); @@ -10171,6 +10210,7 @@ void LinearScan::verifyFinalAllocation() case RefTypeExpUse: case RefTypeDummyDef: // Do nothing; these will be handled by the RefTypeBB. + DBEXEC(VERBOSE, dumpRefPositionShort(currentRefPosition, currentBlock)); DBEXEC(VERBOSE, printf(" ")); break; diff --git a/src/jit/lsra.h b/src/jit/lsra.h index d1c8697fc9..8b457e5d59 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -960,7 +960,7 @@ private: void processBlockEndAllocation(BasicBlock* current); // Record variable locations at start/end of block - void processBlockStartLocations(BasicBlock* current, bool allocationPass); + void processBlockStartLocations(BasicBlock* current); void processBlockEndLocations(BasicBlock* current); #ifdef _TARGET_ARM_ @@ -1286,6 +1286,7 @@ private: void dumpRegRecordTitleIfNeeded(); void dumpRegRecordTitleLines(); void dumpRegRecords(); + void dumpNewBlock(BasicBlock* currentBlock, LsraLocation location); // An abbreviated RefPosition dump for printing with column-based register state void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock); // Print the number of spaces occupied by a dumpRefPositionShort() @@ -1399,6 +1400,9 @@ private: void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode); BasicBlock* getNextCandidateFromWorkList(); + // Indicates whether the allocation pass has been completed. + bool allocationPassComplete; + // The bbNum of the block being currently allocated or resolved. unsigned int curBBNum; // The current location @@ -1452,15 +1456,15 @@ private: #if defined(_TARGET_AMD64_) static bool varTypeNeedsPartialCalleeSave(var_types type) { - return (emitTypeSize(type) == 32); + return (type == TYP_SIMD32); } static const var_types LargeVectorSaveType = TYP_SIMD16; #elif defined(_TARGET_ARM64_) static bool varTypeNeedsPartialCalleeSave(var_types type) { // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes - // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes. - return (emitTypeSize(type) == 16); + // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. + return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); } static const var_types LargeVectorSaveType = TYP_DOUBLE; #else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) @@ -1719,7 +1723,7 @@ public: LclVarDsc* getLocalVar(Compiler* comp) { assert(isLocalVar); - return &(comp->lvaTable[this->varNum]); + return comp->lvaGetDesc(this->varNum); } // Get the local tracked variable "index" (lvVarIndex), used in bitmasks. @@ -1897,7 +1901,7 @@ public: // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if // no reg is allocated. - unsigned char allocRegIfProfitable : 1; + unsigned char regOptional : 1; // Used by RefTypeDef/Use positions of a multi-reg call node. // Indicates the position of the register that this ref position refers to. @@ -2034,23 +2038,23 @@ public: || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE ) && - !AllocateIfProfitable(); + !RegOptional(); } - void setAllocateIfProfitable(bool val) + void setRegOptional(bool val) { - allocRegIfProfitable = val; + regOptional = val; } // Returns true whether this ref position is to be allocated // a reg only if it is profitable. - bool AllocateIfProfitable() + bool RegOptional() { // TODO-CQ: Right now if a ref position is marked as // copyreg or movereg, then it is not treated as // 'allocate if profitable'. This is an implementation // limitation that needs to be addressed. - return allocRegIfProfitable && !copyReg && !moveReg; + return regOptional && !copyReg && !moveReg; } void setMultiRegIdx(unsigned idx) diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp index aada9b2f75..5d8df339a6 100644 --- a/src/jit/lsrabuild.cpp +++ b/src/jit/lsrabuild.cpp @@ -521,7 +521,7 @@ RefPosition* LinearScan::newRefPosition( newRP->registerAssignment = mask; newRP->setMultiRegIdx(0); - newRP->setAllocateIfProfitable(false); + newRP->setRegOptional(false); // We can't have two RefPositions on a RegRecord at the same location, unless they are different types. assert((regRecord->lastRefPosition == nullptr) || (regRecord->lastRefPosition->nodeLocation < theLocation) || @@ -619,7 +619,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, newRP->registerAssignment = mask; newRP->setMultiRegIdx(multiRegIdx); - newRP->setAllocateIfProfitable(false); + newRP->setRegOptional(false); associateRefPosWithInterval(newRP); @@ -657,7 +657,7 @@ RefPosition* LinearScan::newUseRefPosition(Interval* theInterval, RefPosition* pos = newRefPosition(theInterval, currentLoc, RefTypeUse, treeNode, mask, multiRegIdx); if (theTreeNode->IsRegOptional()) { - pos->setAllocateIfProfitable(true); + pos->setRegOptional(true); } return pos; } @@ -2576,6 +2576,7 @@ void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCa // even if 'enregisterLocalVars' is false, or 'liveLargeVectors' is empty, though currently the allocation // phase will fully (rather than partially) spill those, so we don't need to build the UpperVectorRestore // RefPositions in that case. + // This must be done after the kills, so that we know which large vectors are still live. // if ((killMask & RBM_FLT_CALLEE_TRASH) != RBM_NONE) { @@ -2653,7 +2654,7 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu operand = nullptr; } RefPosition* useRefPos = newRefPosition(interval, currentLoc, RefTypeUse, operand, candidates, multiRegIdx); - useRefPos->setAllocateIfProfitable(regOptional); + useRefPos->setRegOptional(regOptional); return useRefPos; } |