// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. /*****************************************************************************/ #ifndef _LSRA_H_ #define _LSRA_H_ #include "arraylist.h" #include "nodeinfo.h" // Minor and forward-reference types class Interval; class RefPosition; class LinearScan; class RegRecord; class LinearScanMemoryAllocatorInterval; class LinearScanMemoryAllocatorRefPosition; template class ArrayStack; // LsraLocation tracks the linearized order of the nodes. // Each node is assigned two LsraLocations - one for all the uses and all but the last // def, and a second location for the last def (if any) typedef unsigned int LsraLocation; const unsigned int MinLocation = 0; const unsigned int MaxLocation = UINT_MAX; // max number of registers an operation could require internally (in addition to uses and defs) const unsigned int MaxInternalRegisters = 8; const unsigned int RegisterTypeCount = 2; typedef var_types RegisterType; #define IntRegisterType TYP_INT #define FloatRegisterType TYP_FLOAT inline regMaskTP calleeSaveRegs(RegisterType rt) { return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED; } struct LocationInfo { LsraLocation loc; Interval * interval; GenTree * treeNode; LocationInfo(LsraLocation l, Interval *i, GenTree *t) : loc(l), interval(i), treeNode(t) {} // default constructor for data structures LocationInfo() {} }; struct LsraBlockInfo { // bbNum of the predecessor to use for the register location of live-in variables. // 0 for fgFirstBB. BasicBlock::weight_t weight; unsigned int predBBNum; bool hasCriticalInEdge; bool hasCriticalOutEdge; }; // This is sort of a bit mask // The low order 2 bits will be 1 for defs, and 2 for uses enum RefType : unsigned char { RefTypeInvalid = 0x00, RefTypeDef = 0x01, RefTypeUse = 0x02, RefTypeKill = 0x04, RefTypeBB = 0x08, RefTypeFixedReg = 0x10, RefTypeExpUse = (0x20 | RefTypeUse), RefTypeParamDef = (0x10 | RefTypeDef), RefTypeDummyDef = (0x20 | RefTypeDef), RefTypeZeroInit = (0x30 | RefTypeDef), RefTypeUpperVectorSaveDef = (0x40 | RefTypeDef), RefTypeUpperVectorSaveUse = (0x40 | RefTypeUse), RefTypeKillGCRefs = 0x80, RefTypeBound, }; // position in a block (for resolution) enum BlockStartOrEnd { BlockPositionStart=0, BlockPositionEnd=1, PositionCount=2 }; inline bool RefTypeIsUse(RefType refType) { return ((refType & RefTypeUse) == RefTypeUse); } inline bool RefTypeIsDef(RefType refType) { return ((refType & RefTypeDef) == RefTypeDef); } typedef regNumber * VarToRegMap; typedef StructArrayList IntervalList; typedef StructArrayList RefPositionList; // Wrapper for norls_allocator class LinearScanMemoryAllocatorRefPosition { public: static void * Alloc (void *context, SIZE_T cb); static void Free (void *context, void *pv) {} }; class LinearScanMemoryAllocatorInterval { public: static void * Alloc (void *context, SIZE_T cb); static void Free (void *context, void *pv) {} }; class Referenceable { public: Referenceable() { firstRefPosition = nullptr; recentRefPosition = nullptr; lastRefPosition = nullptr; isActive = false; } // A linked list of RefPositions. These are only traversed in the forward // direction, and are not moved, so they don't need to be doubly linked // (see RefPosition). RefPosition * firstRefPosition; RefPosition * recentRefPosition; RefPosition * lastRefPosition; bool isActive; // Get the position of the next reference which is at or greater than // the current location (relies upon recentRefPosition being udpated // during traversal). RefPosition * getNextRefPosition(); LsraLocation getNextRefLocation(); }; class RegRecord : public Referenceable { public: RegRecord() { assignedInterval = nullptr; previousInterval = nullptr; regNum = REG_NA; isCalleeSave = false; registerType = IntRegisterType; isBusyUntilNextKill = false; } void init(regNumber reg) { #ifdef _TARGET_ARM64_ // The Zero register, or the SP if ((reg == REG_ZR) || (reg == REG_SP)) { // IsGeneralRegister returns false for REG_ZR and REG_SP regNum = reg; registerType = IntRegisterType; } else #endif if (emitter::isFloatReg(reg)) { registerType = FloatRegisterType; } else { // The constructor defaults to IntRegisterType assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType); } regNum = reg; isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0); } #ifdef DEBUG // print out representation void dump(); // concise representation for embedding void tinyDump(); #endif // DEBUG bool isFree(); //RefPosition * getNextRefPosition(); //LsraLocation getNextRefLocation(); // DATA // interval to which this register is currently allocated. // If the interval is inactive (isActive == false) then it is not currently live, // and the register call be unassigned (i.e. setting assignedInterval to nullptr) // without spilling the register. Interval* assignedInterval; // Interval to which this register was previously allocated, and which was unassigned // because it was inactive. This register will be reassigned to this Interval when // assignedInterval becomes inactive. Interval* previousInterval; regNumber regNum; bool isCalleeSave; RegisterType registerType; // This register must be considered busy until the next time it is explicitly killed. // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding // the problem with the reg becoming free if the last-use is encountered before the call. bool isBusyUntilNextKill; bool conflictingFixedRegReference(RefPosition* refPosition); }; inline bool leafInRange(GenTree *leaf, int lower, int upper) { if (!leaf->IsIntCnsFitsInI32()) return false; if (leaf->gtIntCon.gtIconVal < lower) return false; if (leaf->gtIntCon.gtIconVal > upper) return false; return true; } inline bool leafInRange(GenTree *leaf, int lower, int upper, int multiple) { if (!leafInRange(leaf, lower, upper)) return false; if (leaf->gtIntCon.gtIconVal % multiple) return false; return true; } inline bool leafAddInRange(GenTree *leaf, int lower, int upper, int multiple=1) { if (leaf->OperGet() != GT_ADD) return false; return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple); } inline bool isCandidateVar(LclVarDsc * varDsc) { return varDsc->lvLRACandidate; } /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX LinearScan XX XX XX XX This is the container for the Linear Scan data structures and methods. XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ // OPTION 1: The algorithm as described in "Optimized Interval Splitting in a // Linear Scan Register Allocator". It is driven by iterating over the Interval // lists. In this case, we need multiple IntervalLists, and Intervals will be // moved between them so they must be easily updated. // OPTION 2: The algorithm is driven by iterating over the RefPositions. In this // case, we only need a single IntervalList, and it won't be updated. // The RefPosition must refer to its Interval, and we need to be able to traverse // to the next RefPosition in code order // THIS IS THE OPTION CURRENTLY BEING PURSUED class LinearScan : public LinearScanInterface { friend class LinearScanMemoryAllocatorInterval; friend class LinearScanMemoryAllocatorRefPosition; friend class RefPosition; friend class Interval; friend class Lowering; public: // This could use further abstraction. From Compiler we need the tree, // the flowgraph and the allocator. LinearScan(Compiler * theCompiler); // This is the main driver virtual void doLinearScan(); // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates. // Instead of storing actual register masks, however, which are large, we store a small index into a table // of register masks, stored in this class. We create only as many distinct register masks as are needed. // All identical register masks get the same index. The register mask table contains: // 1. A mask containing all eligible integer registers. // 2. A mask containing all elibible floating-point registers. // 3. A mask for each of single register. // 4. A mask for each combination of registers, created dynamically as required. // // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask // table is never resized. It is also limited by the size of the index, currently an unsigned char. #if defined(_TARGET_ARM64_) static const int numMasks = 128; #else static const int numMasks = 64; #endif regMaskTP* regMaskTable; int nextFreeMask; typedef int RegMaskIndex; // allint is 0, allfloat is 1, all the single-bit masks start at 2 enum KnownRegIndex { ALLINT_IDX = 0, ALLFLOAT_IDX = 1, FIRST_SINGLE_REG_IDX = 2 }; RegMaskIndex GetIndexForRegMask(regMaskTP mask); regMaskTP GetRegMaskForIndex(RegMaskIndex index); void RemoveRegisterFromMasks(regNumber reg); #ifdef DEBUG void dspRegisterMaskTable(); #endif // DEBUG // Initialize the block traversal for LSRA. // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array, // which determines the order in which blocks will be allocated (currently called during Lowering). BasicBlock* startBlockSequence(); // Move to the next block in sequence, updating the current block information. BasicBlock* moveToNextBlock(); // Get the next block to be scheduled without changing the current block, // but updating the blockSequence during the first iteration if it is not fully computed. BasicBlock* getNextBlock(); // This is called during code generation to update the location of variables virtual void recordVarLocationsAtStartOfBB(BasicBlock *bb); // This does the dataflow analysis and builds the intervals void buildIntervals(); // This is where the actual assignment is done void allocateRegisters(); // This is the resolution phase, where cross-block mismatches are fixed up void resolveRegisters(); void writeRegisters(RefPosition *currentRefPosition, GenTree *tree); // Insert a copy in the case where a tree node value must be moved to a different // register at the point of use, or it is reloaded to a different register // than the one it was spilled from void insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE // Insert code to save and restore the upper half of a vector that lives // in a callee-save register at the point of a call (the upper half is // not preserved). void insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE // resolve along one block-block edge enum ResolveType { ResolveSplit, ResolveJoin, ResolveCritical, ResolveSharedCritical, ResolveTypeCount }; #ifdef DEBUG static const char* resolveTypeName[ResolveTypeCount]; #endif enum WhereToInsert { InsertAtTop, InsertAtBottom }; void addResolution(BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber outReg, regNumber inReg); void handleOutgoingCriticalEdges(BasicBlock* block); void resolveEdge (BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet); void resolveEdges(); // Finally, the register assignments are written back to the tree nodes. void recordRegisterAssignments(); // Keep track of how many temp locations we'll need for spill void initMaxSpill(); void updateMaxSpill(RefPosition* refPosition); void recordMaxSpill(); // max simultaneous spill locations used of every type unsigned int maxSpill [TYP_COUNT]; unsigned int currentSpill [TYP_COUNT]; bool needFloatTmpForFPCall; bool needDoubleTmpForFPCall; #ifdef DEBUG private: //------------------------------------------------------------------------ // Should we stress lsra? // This uses the same COMPLUS variable as rsStressRegs (COMPLUS_JitStressRegs) // However, the possible values and their interpretation are entirely different. // // The mask bits are currently divided into fields in which each non-zero value // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2). // However, subject to possible constraints (to be determined), the different // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4). // Note that the field values are declared in a public enum, but the actual bits are // only accessed via accessors. unsigned lsraStressMask; // This controls the registers available for allocation enum LsraStressLimitRegs { LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2, LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3 }; // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save // registers, so as to get different coverage than limiting to callee or caller. // At least for x86 and AMD64, and potentially other architecture that will support SIMD, // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4. // Hence the "SmallFPSet" has 5 elements. #if defined(_TARGET_AMD64_) static const regMaskTP LsraLimitSmallIntSet = RBM_LOWINT; static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7 ); #elif defined(_TARGET_ARM_) static const regMaskTP LsraLimitSmallIntSet = (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0|RBM_F1|RBM_F2|RBM_F16|RBM_F17); #elif defined(_TARGET_ARM64_) static const regMaskTP LsraLimitSmallIntSet = (RBM_R0|RBM_R1|RBM_R2|RBM_R19|RBM_R20); static const regMaskTP LsraLimitSmallFPSet = (RBM_V0|RBM_V1|RBM_V2|RBM_V8|RBM_V9); #elif defined(_TARGET_X86_) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7 ); #else #error Unsupported or unset target architecture #endif // target LsraStressLimitRegs getStressLimitRegs() { return (LsraStressLimitRegs) (lsraStressMask & LSRA_LIMIT_MASK); } regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask); // This controls the heuristics used to select registers // These can be combined. enum LsraSelect { LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04, LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c }; LsraSelect getSelectionHeuristics() { return (LsraSelect) (lsraStressMask & LSRA_SELECT_MASK); } bool doReverseSelect() { return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0); } bool doReverseCallerCallee() { return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0); } bool doSelectNearest() { return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0); } // This controls the order in which basic blocks are visited during allocation enum LsraTraversalOrder { LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40, LSRA_TRAVERSE_RANDOM = 0x60, // NYI LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60 }; LsraTraversalOrder getLsraTraversalOrder() { if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0) { return LSRA_TRAVERSE_DEFAULT; } return (LsraTraversalOrder) (lsraStressMask & LSRA_TRAVERSE_MASK); } bool isTraversalLayoutOrder () { return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT; } bool isTraversalPredFirstOrder () { return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST; } // This controls whether lifetimes should be extended to the entire method. // Note that this has no effect under MinOpts enum LsraExtendLifetimes { LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80 }; LsraExtendLifetimes getLsraExtendLifeTimes() { return (LsraExtendLifetimes) (lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK); } bool extendLifetimes() { return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES; } // This controls whether variables locations should be set to the previous block in layout order (LSRA_BLOCK_BOUNDARY_LAYOUT), // or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED - the default), // or rotated (LSRA_BLOCK_BOUNDARY_ROTATE). enum LsraBlockBoundaryLocations { LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100, LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300 }; LsraBlockBoundaryLocations getLsraBlockBoundaryLocations() { return (LsraBlockBoundaryLocations) (lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK); } regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs); // This controls whether we always insert a GT_RELOAD instruction after a spill // Note that this can be combined with LsraSpillAlways (or not) enum LsraReload { LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400 }; LsraReload getLsraReload() { return (LsraReload) (lsraStressMask & LSRA_RELOAD_MASK); } bool alwaysInsertReload() { return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD; } // This controls whether we spill everywhere enum LsraSpill { LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800 }; LsraSpill getLsraSpill() { return (LsraSpill) (lsraStressMask & LSRA_SPILL_MASK); } bool spillAlways() { return getLsraSpill() == LSRA_SPILL_ALWAYS; } // Dump support void lsraDumpIntervals(const char* msg); void dumpRefPositions(const char *msg); void dumpVarRefPositions(const char *msg); void verifyFinalAllocation(); void verifyResolutionMove(GenTreeStmt* resolutionStmt, LsraLocation currentLocation); #else // !DEBUG bool doSelectNearest() { return false; } bool extendLifetimes() { return false; } bool spillAlways() { return false; } // In a retail build we support only the default traversal order bool isTraversalLayoutOrder () { return false; } bool isTraversalPredFirstOrder () { return true; } bool getLsraExtendLifeTimes() { return false; } #endif // !DEBUG public: // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates(). bool isRegCandidate(LclVarDsc* varDsc); private: // Determine which locals are candidates for allocation void identifyCandidates(); // determine which locals are used in EH constructs we don't want to deal with void identifyCandidatesExceptionDataflow(); void buildPhysRegRecords(); void setLastUses(BasicBlock * block); void setFrameType(); // Update allocations at start/end of block void processBlockEndAllocation(BasicBlock * current); // Record variable locations at start/end of block void processBlockStartLocations(BasicBlock* current, bool allocationPass); void processBlockEndLocations(BasicBlock* current); RefType CheckBlockType(BasicBlock * block, BasicBlock * prevBlock); // insert refpositions representing prolog zero-inits which will be added later void insertZeroInitRefPositions(); void AddMapping(GenTree *node, LsraLocation loc); // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse); void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); void buildRefPositionsForNode(GenTree *tree, BasicBlock *block, ArrayStack *stack, LsraLocation loc); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree *tree, LsraLocation currentLoc); void buildUpperVectorRestoreRefPositions(GenTree *tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors); #endif //FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // For AMD64 on SystemV machines. This method // is called as replacement for raUpdateRegStateForArg // that is used on Windows. On System V systems a struct can be passed // partially using registers from the 2 register files. void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc); #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Update reg state for an incoming register argument void updateRegStateForArg(LclVarDsc* argDsc); inline void setTreeNodeInfo(GenTree *tree, TreeNodeInfo info) { tree->gtLsraInfo = info; tree->gtClearReg(compiler); DBEXEC(VERBOSE, info.dump(this)); } inline void clearDstCount(GenTree *tree) { tree->gtLsraInfo.dstCount = 0; } inline void clearOperandCounts(GenTree *tree) { TreeNodeInfo &info = tree->gtLsraInfo; info.srcCount = 0; info.dstCount = 0; } inline bool isLocalDefUse(GenTree *tree) { return tree->gtLsraInfo.isLocalDefUse; } inline bool isCandidateLocalRef(GenTree *tree) { if (tree->IsLocal()) { unsigned int lclNum = tree->gtLclVarCommon.gtLclNum; assert(lclNum < compiler->lvaCount); LclVarDsc * varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum; return isCandidateVar(varDsc); } return false; } static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTreePtr tree, void *p); // Return the registers killed by the given tree node. regMaskTP getKillSetForNode(GenTree* tree); // Given some tree node add refpositions for all the registers this node kills bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc); regMaskTP allRegs(RegisterType rt); regMaskTP allSIMDRegs(); regMaskTP internalFloatRegCandidates(); bool registerIsFree(regNumber regNum, RegisterType regType); bool registerIsAvailable(RegRecord *physRegRecord, LsraLocation currentLoc, LsraLocation * nextRefLocationPtr, RegisterType regType); void freeRegister(RegRecord *physRegRecord); void freeRegisters(regMaskTP regsToFree); regMaskTP getUseCandidates(GenTree *useNode); regMaskTP getDefCandidates(GenTree *tree); var_types getDefType(GenTree *tree); RefPosition * defineNewInternalTemp(GenTree *tree, RegisterType regType, LsraLocation currentLoc, regMaskTP regMask); int buildInternalRegisterDefsForNode(GenTree *tree, LsraLocation currentLoc, RefPosition* defs[]); void buildInternalRegisterUsesForNode(GenTree *tree, LsraLocation currentLoc, RefPosition* defs[], int total); void resolveLocalRef(GenTreePtr treeNode, RefPosition * currentRefPosition); void insertMove(BasicBlock * block, GenTreePtr insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg); void insertSwap(BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2); public: // TODO-Cleanup: unused? class PhysRegIntervalIterator { public: PhysRegIntervalIterator(LinearScan * theLinearScan) { nextRegNumber = (regNumber) 0; linearScan = theLinearScan; } RegRecord * GetNext() { return &linearScan->physRegs[nextRegNumber]; } private: // This assumes that the physical registers are contiguous, starting // with a register number of 0 regNumber nextRegNumber; LinearScan * linearScan; }; private: Interval * newInterval(RegisterType regType); Interval * getIntervalForLocalVar(unsigned varNum) { return localVarIntervals[varNum]; } RegRecord * getRegisterRecord(regNumber regNum); RefPosition * newRefPositionRaw(); RefPosition * newRefPosition(Interval * theInterval, LsraLocation theLocation, RefType theRefType, GenTree * theTreeNode, regMaskTP mask); RefPosition * newRefPosition(regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree * theTreeNode, regMaskTP mask); void applyCalleeSaveHeuristics(RefPosition* rp); void associateRefPosWithInterval(RefPosition *rp); void associateRefPosWithRegister(RefPosition *rp); /***************************************************************************** * Register management ****************************************************************************/ RegisterType getRegisterType(Interval *currentInterval, RefPosition* refPosition); regNumber tryAllocateFreeReg(Interval *current, RefPosition *refPosition); RegRecord* findBestPhysicalReg(RegisterType regType, LsraLocation endLocation, regMaskTP candidates, regMaskTP preferences); regNumber allocateBusyReg(Interval *current, RefPosition *refPosition); regNumber assignCopyReg(RefPosition * refPosition); void checkAndAssignInterval(RegRecord * regRec, Interval * interval); void assignPhysReg(RegRecord * regRec, Interval * interval); void assignPhysReg( regNumber reg, Interval * interval) { assignPhysReg(getRegisterRecord(reg), interval); } void checkAndClearInterval(RegRecord * regRec, RefPosition* spillRefPosition); void unassignPhysReg(RegRecord * regRec, RefPosition* spillRefPosition); void unassignPhysRegNoSpill( RegRecord* reg); void unassignPhysReg( regNumber reg) { unassignPhysReg(getRegisterRecord(reg), nullptr); } void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition); void spillGCRefs(RefPosition* killRefPosition); /***************************************************************************** * For Resolution phase ****************************************************************************/ // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling unsigned int regMapCount; // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we // rely on the property that the "in" map is the same as the "from" block of the edge, and the // "out" map is the same as the "to" block of the edge (by construction). // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the // splitBBNumToTargetBBNumMap. // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding // the arrays. unsigned bbNumMaxBeforeResolution; struct SplitEdgeInfo { unsigned fromBBNum; unsigned toBBNum; }; typedef SimplerHashTable, SplitEdgeInfo, DefaultSimplerHashBehavior> SplitBBNumToTargetBBNumMap; SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap; SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap() { if (splitBBNumToTargetBBNumMap == nullptr) { splitBBNumToTargetBBNumMap = new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler)); } return splitBBNumToTargetBBNumMap; } SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum); void initVarRegMaps(); void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg); void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg); VarToRegMap getInVarToRegMap(unsigned int bbNum); VarToRegMap getOutVarToRegMap(unsigned int bbNum); regNumber getVarReg(VarToRegMap map, unsigned int varNum); // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of // the block) VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap); regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type); #ifdef DEBUG void dumpVarToRegMap(VarToRegMap map); void dumpInVarToRegMap(BasicBlock * block); void dumpOutVarToRegMap(BasicBlock * block); // There are three points at which a tuple-style dump is produced, and each // differs slightly: // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what // tree nodes are consumed. // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before // register allocation, each node is dumped, along with all of the RefPositions, // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other // intervals, and Tnnn for internal temps. // - In LSRA_DUMP_POST, which is after register allocation, the registers are // shown. enum LsraTupleDumpMode { LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST }; void lsraGetOperandString(GenTreePtr tree, LsraTupleDumpMode mode, char *operandString, unsigned operandStringLength); void lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest); void TupleStyleDump(LsraTupleDumpMode mode); bool dumpTerse; LsraLocation maxNodeLocation; // Width of various fields - used to create a streamlined dump during allocation that shows the // state of all the registers in columns. int regColumnWidth; int regTableIndent; const char* columnSeparator; const char* line; const char* leftBox; const char* middleBox; const char* rightBox; static const int MAX_FORMAT_CHARS = 12; char intervalNameFormat[MAX_FORMAT_CHARS]; char regNameFormat[MAX_FORMAT_CHARS]; char shortRefPositionFormat[MAX_FORMAT_CHARS]; char emptyRefPositionFormat[MAX_FORMAT_CHARS]; char indentFormat[MAX_FORMAT_CHARS]; static const int MAX_LEGEND_FORMAT_CHARS = 25; char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS]; char legendFormat[MAX_LEGEND_FORMAT_CHARS]; // How many rows have we printed since last printing a "title row"? static const int MAX_ROWS_BETWEEN_TITLES = 50; int rowCountSinceLastTitle; void dumpRegRecordHeader(); void dumpRegRecordTitle(); void dumpRegRecordTitleLines(); int getLastUsedRegNumIndex(); void dumpRegRecords(); // An abbreviated RefPosition dump for printing with column-based register state void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock); // Print the number of spaces occupied by a dumpRefPositionShort() void dumpEmptyRefPosition(); // A dump of Referent, in exactly regColumnWidth characters void dumpIntervalName(Interval* interval); // Events during the allocation phase that cause some dump output, which differs depending // upon whether dumpTerse is set: enum LsraDumpEvent { // Conflicting def/use LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2, LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6, // Spilling LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS, // Block boundaries LSRA_EVENT_START_BB, LSRA_EVENT_END_BB, //Miscellaneous LSRA_EVENT_FREE_REGS, // Characteristics of the current RefPosition LSRA_EVENT_INCREMENT_RANGE_END, // ??? LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG, // Allocation decisions LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG, LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, LSRA_EVENT_REUSE_REG, }; void dumpLsraAllocationEvent(LsraDumpEvent event, Interval* interval = nullptr, regNumber reg = REG_NA, BasicBlock* currentBlock = nullptr); void dumpBlockHeader(BasicBlock* block); void validateIntervals(); #endif // DEBUG Compiler * compiler; private: #if MEASURE_MEM_ALLOC IAllocator* lsraIAllocator; #endif IAllocator* getAllocator(Compiler* comp) { #if MEASURE_MEM_ALLOC if (lsraIAllocator == nullptr) { lsraIAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA); } return lsraIAllocator; #else return comp->getAllocator(); #endif } #ifdef DEBUG unsigned int intervalCount; // This is used for dumping RefPosition* activeRefPosition; #endif // DEBUG unsigned int refPositionCount; IntervalList intervals; RegRecord physRegs[REG_COUNT]; Interval ** localVarIntervals; // Set of blocks that have been visited. BlockSet bbVisitedSet; void markBlockVisited(BasicBlock* block) { BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum); } void clearVisitedBlocks() { BlockSetOps::ClearD(compiler, bbVisitedSet); } bool isBlockVisited(BasicBlock* block) { return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum); } // A map from bbNum to the block information used during register allocation. LsraBlockInfo* blockInfo; BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUG_ARG(bool* pPredBlockIsAllocated)); // The order in which the blocks will be allocated. // This is any array of BasicBlock*, in the order in which they should be traversed. BasicBlock** blockSequence; // The verifiedAllBBs flag indicates whether we have verified that all BBs have been // included in the blockSeuqence above, during setBlockSequence(). bool verifiedAllBBs; void setBlockSequence(); BasicBlockList* blockSequenceWorkList; bool blockSequencingDone; void addToBlockSequenceWorkList(BasicBlock* block); void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode); BasicBlock* getNextCandidateFromWorkList(); // The bbNum of the block being currently allocated or resolved. unsigned int curBBNum; // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated). unsigned int curBBSeqNum; // The number of blocks that we've sequenced. unsigned int bbSeqCount; // The Location of the start of the current block. LsraLocation curBBStartLocation; // Ordered list of RefPositions RefPositionList refPositions; // Per-block variable location mappings: an array indexed by block number that yields a // pointer to an array of regNumber, one per variable. VarToRegMap * inVarToRegMaps; VarToRegMap * outVarToRegMaps; // A temporary VarToRegMap used during the resolution of critical edges. VarToRegMap sharedCriticalVarToRegMap; PhasedVar availableIntRegs; PhasedVar availableFloatRegs; PhasedVar availableDoubleRegs; // Current set of live tracked vars, used during building of RefPositions to determine whether // to preference to callee-save VARSET_TP currentLiveVars; // Set of floating point variables to consider for callee-save registers. VARSET_TP fpCalleeSaveCandidateVars; #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(_TARGET_AMD64_) static const var_types LargeVectorType = TYP_SIMD32; static const var_types LargeVectorSaveType = TYP_SIMD16; #elif defined(_TARGET_ARM64_) static const var_types LargeVectorType = TYP_SIMD16; static const var_types LargeVectorSaveType = TYP_DOUBLE; #else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) #error("Unknown target architecture for FEATURE_SIMD") #endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) // Set of large vector (TYP_SIMD32 on AVX) variables. VARSET_TP largeVectorVars; // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers. VARSET_TP largeVectorCalleeSaveCandidateVars; #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE }; /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX Interval XX XX XX XX This is the fundamental data structure for linear scan register XX XX allocation. It represents the live range(s) for a variable or temp. XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ class Interval : public Referenceable { public: // Initialize the interval void init() { memset(this, 0, sizeof(Interval)); physReg = REG_COUNT; } #ifdef DEBUG // print out representation void dump(); // concise representation for embedding void tinyDump(); // extremely concise representation void microDump(); #endif // DEBUG void setLocalNumber(unsigned localNum, LinearScan *l); // Fixed registers for which this Interval has a preference regMaskTP registerPreferences; // The relatedInterval is: // - for any other interval, it is the interval to which this interval // is currently preferenced (e.g. because they are related by a copy) Interval * relatedInterval; // The assignedReg is the RecRecord for the register to which this interval // has been assigned at some point - if the interval is active, this is the // register it currently occupies. RegRecord * assignedReg; // DECIDE : put this in a union or do something w/ inheritance? // this is an interval for a physical register, not a allocatable entity RegisterType registerType; bool isLocalVar : 1; // Indicates whether this interval has been assigned to different registers bool isSplit : 1; // Indicates whether this interval is ever spilled bool isSpilled : 1; // indicates an interval representing the internal requirements for // generating code for a node (temp registers internal to the node) // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND // case (though never lives beyond a stmt) bool isInternal : 1; // true if this is a LocalVar for a struct field bool isStructField : 1; // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct bool isPromotedStruct : 1; // true if this is an SDSU interval for which the def and use have conflicting register // requirements bool hasConflictingDefUse : 1; // true if this interval is defined by a non-commutative 2-operand instruction bool hasNonCommutativeRMWDef : 1; // True if this interval is defined by a putArg, whose source is a non-last-use lclVar. // During allocation, this flag will be cleared if the source is not already in the required register. // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call. bool isSpecialPutArg : 1; // True if this interval interferes with a call. bool preferCalleeSave : 1; // True if this interval is defined by a constant node that may be reused and/or may be // able to reuse a constant that's already in a register. bool isConstant : 1; // The register to which it is currently assigned. regNumber physReg; #ifdef DEBUG unsigned int intervalIndex; #endif // DEBUG unsigned int varNum; // This is the "variable number": the index into the lvaTable array LclVarDsc *getLocalVar(Compiler *comp) { assert(isLocalVar); return &(comp->lvaTable[this->varNum]); } // Get the local tracked variable "index" (lvVarIndex), used in bitmasks. unsigned getVarIndex(Compiler* comp) { LclVarDsc * varDsc = getLocalVar(comp); assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function! return varDsc->lvVarIndex; } bool isAssignedTo(regNumber regNum) { // This uses regMasks to handle the case where a double actually occupies two registers // TODO-Throughput: This could/should be done more cheaply. return (physReg != REG_NA && (genRegMask(physReg,registerType) & genRegMask(regNum)) != RBM_NONE); } // Assign the related interval. void assignRelatedInterval(Interval* newRelatedInterval) { #ifdef DEBUG if (VERBOSE) { printf("Assigning related "); newRelatedInterval->microDump(); printf(" to "); this->microDump(); printf("\n"); } #endif // DEBUG relatedInterval = newRelatedInterval; } // Assign the related interval, but only if it isn't already assigned. void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval) { if (relatedInterval == nullptr) { assignRelatedInterval(newRelatedInterval); } else { #ifdef DEBUG if (VERBOSE) { printf("Interval "); this->microDump(); printf(" already has a related interval\n"); } #endif // DEBUG } } // Update the registerPreferences on the interval. // If there are conflicting requirements on this interval, set the preferences to // the union of them. That way maybe we'll get at least one of them. // An exception is made in the case where one of the existing or new // preferences are all callee-save, in which case we "prefer" the callee-save void updateRegisterPreferences(regMaskTP preferences) { // We require registerPreferences to have been initialized. assert(registerPreferences != RBM_NONE); // It is invalid to update with empty preferences assert (preferences != RBM_NONE); regMaskTP commonPreferences = (registerPreferences & preferences); if (commonPreferences != RBM_NONE) { registerPreferences = commonPreferences; return; } // There are no preferences in common. // Preferences need to reflect both cases where a var must occupy a specific register, // as well as cases where a var is live when a register is killed. // In the former case, we would like to record all such registers, however we don't // really want to use any registers that will interfere. // To approximate this, we never "or" together multi-reg sets, which are generally kill sets. if (!genMaxOneBit(preferences)) { // The new preference value is a multi-reg set, so it's probably a kill. // Keep the new value. registerPreferences = preferences; return; } if (!genMaxOneBit(registerPreferences)) { // The old preference value is a multi-reg set. // Keep the existing preference set, as it probably reflects one or more kills. // It may have been a union of multiple individual registers, but we can't // distinguish that case without extra cost. return; } // If we reach here, we have two disjoint single-reg sets. // Keep only the callee-save preferences, if not empty. // Otherwise, take the union of the preferences. regMaskTP newPreferences = registerPreferences|preferences; if (preferCalleeSave) { regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences)); if (calleeSaveMask != RBM_NONE) { newPreferences = calleeSaveMask; } } registerPreferences = newPreferences; } }; class RefPosition { public: // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it // refers to an Interval, then 'isPhysRegRef' is false. // // Q: can 'referent' be NULL? Referenceable * referent; Interval *getInterval() { assert (!isPhysRegRef); return (Interval *) referent; } void setInterval(Interval *i) { referent = i; isPhysRegRef = false; } RegRecord *getReg() { assert (isPhysRegRef); return (RegRecord *) referent; } void setReg(RegRecord *r) { referent = r; isPhysRegRef = true; registerAssignment = genRegMask(r->regNum); } // nextRefPosition is the next in code order. // Note that in either case there is no need for these to be doubly linked, as they // are only traversed in the forward direction, and are not moved. RefPosition * nextRefPosition; // The remaining fields are common to both options GenTree* treeNode; unsigned int bbNum; // Prior to the allocation pass, registerAssignment captures the valid registers // for this RefPosition. An empty set means that any register is valid. A non-empty // set means that it must be one of the given registers (may be the full set if the // only constraint is that it must reside in SOME register) // After the allocation pass, this contains the actual assignment LsraLocation nodeLocation; regMaskTP registerAssignment; regNumber assignedReg() { return genRegNumFromMask(registerAssignment); } RefType refType; bool RequiresRegister() { return (refType == RefTypeDef || refType == RefTypeUse #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE ); } // Last Use - this may be true for multiple RefPositions in the same Interval bool lastUse : 1; // Spill and Copy info // reload indicates that the value was spilled, and must be reloaded here. // spillAfter indicates that the value is spilled here, so a spill must be added. // copyReg indicates that the value needs to be copied to a specific register, // but that it will also retain its current assigned register. // moveReg indicates that the value needs to be moved to a different register, // and that this will be its new assigned register. // A RefPosition may have any flag individually or the following combinations: // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg) // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled) // - spillAfter and moveReg (i.e. it most be both spilled and moved) // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then // we need an explicit move. // - copyReg and moveReg must not exist with each other. bool reload : 1; bool spillAfter : 1; bool copyReg : 1; bool moveReg : 1; // true if this var is moved to a new register bool isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval bool isFixedRegRef : 1; bool isLocalDefUse : 1; // delayRegFree indicates that the register should not be freed right away, but instead wait // until the next Location after it would normally be freed. This is used for the case of // non-commutative binary operators, where op2 must not be assigned the same register as // the target. We do this by not freeing it until after the target has been defined. // Another option would be to actually change the Location of the op2 use until the same // Location as the def, but then it could potentially reuse a register that has been freed // from the other source(s), e.g. if it's a lastUse or spilled. bool delayRegFree : 1; // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the // register currently assigned to the Interval. This happens when we use the assigned // register from a predecessor that is not the most recently allocated BasicBlock. bool outOfOrder : 1; LsraLocation getRefEndLocation() { return delayRegFree ? nodeLocation+1 : nodeLocation; } #ifdef DEBUG unsigned rpNum; // The unique RefPosition number, equal to its index in the refPositions list. Only used for debugging dumps. #endif // DEBUG bool isIntervalRef() { return (!isPhysRegRef && (referent != nullptr)); } // isTrueDef indicates that the RefPosition is a non-update def of a non-internal // interval bool isTrueDef() { return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal); } // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register // specified by the given mask bool isFixedRefOfRegMask(regMaskTP regMask) { assert(genMaxOneBit(regMask)); return (registerAssignment == regMask); } // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register bool isFixedRefOfReg(regNumber regNum) { return (isFixedRefOfRegMask(genRegMask(regNum))); } #ifdef DEBUG // operator= copies everything except 'rpNum', which must remain unique RefPosition & operator=(const RefPosition & rp) { unsigned rpNumSave = rpNum; memcpy(this, &rp, sizeof(rp)); rpNum = rpNumSave; return *this; } void dump(); #endif // DEBUG }; #ifdef DEBUG void dumpRegMask(regMaskTP regs); #endif // DEBUG /*****************************************************************************/ #endif//_LSRA_H_ /*****************************************************************************/