// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX RegAlloc XX XX XX XX Does the register allocation and puts the remaining lclVars on the stack XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" #ifdef _MSC_VER #pragma hdrstop #endif #include "regalloc.h" #if FEATURE_FP_REGALLOC Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP() { DWORD val = JitConfig.JitRegisterFP(); return (enumConfigRegisterFP)(val & 0x3); } #endif // FEATURE_FP_REGALLOC regMaskTP Compiler::raConfigRestrictMaskFP() { regMaskTP result = RBM_NONE; #if FEATURE_FP_REGALLOC switch (raConfigRegisterFP()) { case CONFIG_REGISTER_FP_NONE: result = RBM_NONE; break; case CONFIG_REGISTER_FP_CALLEE_TRASH: result = RBM_FLT_CALLEE_TRASH; break; case CONFIG_REGISTER_FP_CALLEE_SAVED: result = RBM_FLT_CALLEE_SAVED; break; case CONFIG_REGISTER_FP_FULL: result = RBM_ALLFLOAT; break; } #endif return result; } #if DOUBLE_ALIGN DWORD Compiler::getCanDoubleAlign() { #ifdef DEBUG if (compStressCompile(STRESS_DBL_ALN, 20)) return MUST_DOUBLE_ALIGN; return JitConfig.JitDoubleAlign(); #else return DEFAULT_DOUBLE_ALIGN; #endif } //------------------------------------------------------------------------ // shouldDoubleAlign: Determine whether to double-align the frame // // Arguments: // refCntStk - sum of ref counts for all stack based variables // refCntEBP - sum of ref counts for EBP enregistered variables // refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables // refCntStkParam - sum of ref counts for all stack based parameters // refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs // with double fields). // // Return Value: // Returns true if this method estimates that a double-aligned frame would be beneficial // // Notes: // The impact of a double-aligned frame is computed as follows: // - We save a byte of code for each parameter reference (they are frame-pointer relative) // - We pay a byte of code for each non-parameter stack reference. // - We save the misalignment penalty and possible cache-line crossing penalty. // This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise. // - We pay 7 extra bytes for: // MOV EBP,ESP, // LEA ESP,[EBP-offset] // AND ESP,-8 to double align ESP // - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP). // // If the misalignment penalty is estimated to be less than the bytes used, we don't double align. // Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the // ref count for double-aligned values. // bool Compiler::shouldDoubleAlign( unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl) { bool doDoubleAlign = false; const unsigned DBL_ALIGN_SETUP_SIZE = 7; unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE; unsigned misaligned_weight = 4; if (compCodeOpt() == Compiler::SMALL_CODE) misaligned_weight = 0; if (compCodeOpt() == Compiler::FAST_CODE) misaligned_weight *= 4; JITDUMP("\nDouble alignment:\n"); JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n", bytesUsed); JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP); JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl); if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT)) { JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed); } else if (refCntWtdEBP > refCntWtdStkDbl * 2) { // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is // not double aligned. // Here are the numbers that make this not double-aligned. // refCntWtdStkDbl = 0x164 // refCntWtdEBP = 0x1a4 // We think we do need to change the heuristic to be in favor of double-align. JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n"); } else { // OK we passed all of the benefit tests, so we'll predict a double aligned frame. JITDUMP(" Predicting to create a double-aligned frame\n"); doDoubleAlign = true; } return doDoubleAlign; } #endif // DOUBLE_ALIGN #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead. void Compiler::raInit() { #if FEATURE_STACK_FP_X87 /* We have not assigned any FP variables to registers yet */ VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal()); #endif codeGen->intRegState.rsIsFloat = false; codeGen->floatRegState.rsIsFloat = true; rpReverseEBPenreg = false; rpAsgVarNum = -1; rpPassesMax = 6; rpPassesPessimize = rpPassesMax - 3; if (opts.compDbgCode) { rpPassesMax++; } rpStkPredict = (unsigned)-1; rpFrameType = FT_NOT_SET; rpLostEnreg = false; rpMustCreateEBPCalled = false; rpRegAllocDone = false; rpMaskPInvokeEpilogIntf = RBM_NONE; rpPredictMap[PREDICT_NONE] = RBM_NONE; rpPredictMap[PREDICT_ADDR] = RBM_NONE; #if FEATURE_FP_REGALLOC rpPredictMap[PREDICT_REG] = RBM_ALLINT | RBM_ALLFLOAT; rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT; #else rpPredictMap[PREDICT_REG] = RBM_ALLINT; rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT; #endif #define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name; #include "register.h" #if defined(_TARGET_ARM_) rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1; rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3; rpPredictMap[PREDICT_REG_SP] = RBM_ILLEGAL; #elif defined(_TARGET_AMD64_) rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX; rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX; rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL; #elif defined(_TARGET_X86_) rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX; rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX; rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL; rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX; rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX; #endif rpBestRecordedPrediction = NULL; } /***************************************************************************** * * The following table(s) determines the order in which registers are considered * for variables to live in */ const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize) { #if FEATURE_FP_REGALLOC if (varTypeIsFloating(regType)) { static const regNumber raRegVarOrderFlt[] = {REG_VAR_ORDER_FLT}; const unsigned raRegVarOrderFltSize = sizeof(raRegVarOrderFlt) / sizeof(raRegVarOrderFlt[0]); if (wbVarOrderSize != NULL) *wbVarOrderSize = raRegVarOrderFltSize; return &raRegVarOrderFlt[0]; } else #endif { static const regNumber raRegVarOrder[] = {REG_VAR_ORDER}; const unsigned raRegVarOrderSize = sizeof(raRegVarOrder) / sizeof(raRegVarOrder[0]); if (wbVarOrderSize != NULL) *wbVarOrderSize = raRegVarOrderSize; return &raRegVarOrder[0]; } } #ifdef DEBUG /***************************************************************************** * * Dump out the variable interference graph * */ void Compiler::raDumpVarIntf() { unsigned lclNum; LclVarDsc* varDsc; printf("Var. interference graph for %s\n", info.compFullName); for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { /* Ignore the variable if it's not tracked */ if (!varDsc->lvTracked) continue; /* Get hold of the index and the interference mask for the variable */ unsigned varIndex = varDsc->lvVarIndex; printf(" V%02u,T%02u and ", lclNum, varIndex); unsigned refIndex; for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++) { if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex)) printf("T%02u ", refIndex); else printf(" "); } printf("\n"); } printf("\n"); } /***************************************************************************** * * Dump out the register interference graph * */ void Compiler::raDumpRegIntf() { printf("Reg. interference graph for %s\n", info.compFullName); unsigned lclNum; LclVarDsc* varDsc; for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { unsigned varNum; /* Ignore the variable if it's not tracked */ if (!varDsc->lvTracked) continue; /* Get hold of the index and the interference mask for the variable */ varNum = varDsc->lvVarIndex; printf(" V%02u,T%02u and ", lclNum, varNum); if (varDsc->IsFloatRegType()) { #if !FEATURE_STACK_FP_X87 for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum)) { if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum)) printf("%3s ", getRegName(regNum, true)); else printf(" "); } #endif } else { for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum)) { if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum)) printf("%3s ", getRegName(regNum)); else printf(" "); } } printf("\n"); } printf("\n"); } #endif // DEBUG /***************************************************************************** * * We'll adjust the ref counts based on interference * */ void Compiler::raAdjustVarIntf() { // This method was not correct and has been disabled. return; } /*****************************************************************************/ /*****************************************************************************/ /* Determine register mask for a call/return from type. */ inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree) { var_types type = tree->TypeGet(); if (type == TYP_STRUCT && IsHfa(tree)) { int retSlots = GetHfaCount(tree); return ((1 << retSlots) - 1) << REG_FLOATRET; } const static regMaskTP returnMap[TYP_COUNT] = { RBM_ILLEGAL, // TYP_UNDEF, RBM_NONE, // TYP_VOID, RBM_INTRET, // TYP_BOOL, RBM_INTRET, // TYP_CHAR, RBM_INTRET, // TYP_BYTE, RBM_INTRET, // TYP_UBYTE, RBM_INTRET, // TYP_SHORT, RBM_INTRET, // TYP_USHORT, RBM_INTRET, // TYP_INT, RBM_INTRET, // TYP_UINT, RBM_LNGRET, // TYP_LONG, RBM_LNGRET, // TYP_ULONG, RBM_FLOATRET, // TYP_FLOAT, RBM_DOUBLERET, // TYP_DOUBLE, RBM_INTRET, // TYP_REF, RBM_INTRET, // TYP_BYREF, RBM_INTRET, // TYP_ARRAY, RBM_ILLEGAL, // TYP_STRUCT, RBM_ILLEGAL, // TYP_BLK, RBM_ILLEGAL, // TYP_LCLBLK, RBM_ILLEGAL, // TYP_PTR, RBM_ILLEGAL, // TYP_FNC, RBM_ILLEGAL, // TYP_UNKNOWN, }; assert((unsigned)type < sizeof(returnMap) / sizeof(returnMap[0])); assert(returnMap[TYP_LONG] == RBM_LNGRET); assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET); assert(returnMap[TYP_REF] == RBM_INTRET); assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL); regMaskTP result = returnMap[type]; assert(result != RBM_ILLEGAL); return result; } /*****************************************************************************/ /****************************************************************************/ #ifdef DEBUG static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life) { unsigned lclNum; LclVarDsc* varDsc; for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++) { if (!varDsc->lvTracked) continue; if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex)) continue; if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex)) printf("V%02u ", lclNum); } } #endif /*****************************************************************************/ #ifdef DEBUG /***************************************************************************** * * Debugging helpers - display variables liveness info. */ void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp) { do { printf("BB%02u: ", beg->bbNum); printf(" in = [ "); dispLifeSet(comp, mask, beg->bbLiveIn); printf("] ,"); printf(" out = [ "); dispLifeSet(comp, mask, beg->bbLiveOut); printf("]"); if (beg->bbFlags & BBF_VISITED) printf(" inner=%u", beg->bbFPinVars); printf("\n"); beg = beg->bbNext; if (!beg) return; } while (beg != end); } #if FEATURE_STACK_FP_X87 void Compiler::raDispFPlifeInfo() { BasicBlock* block; for (block = fgFirstBB; block; block = block->bbNext) { GenTreePtr stmt; printf("BB%02u: in = [ ", block->bbNum); dispLifeSet(this, optAllFloatVars, block->bbLiveIn); printf("]\n\n"); VARSET_TP VARSET_INIT(this, life, block->bbLiveIn); for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) { GenTreePtr tree; noway_assert(stmt->gtOper == GT_STMT); for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) { VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree)); dispLifeSet(this, optAllFloatVars, life); printf(" "); gtDispTree(tree, 0, NULL, true); } printf("\n"); } printf("BB%02u: out = [ ", block->bbNum); dispLifeSet(this, optAllFloatVars, block->bbLiveOut); printf("]\n\n"); } } #endif // FEATURE_STACK_FP_X87 /*****************************************************************************/ #endif // DEBUG /*****************************************************************************/ /*****************************************************************************/ void Compiler::raSetRegVarOrder( var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg) { unsigned normalVarOrderSize; const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize); unsigned index; unsigned listIndex = 0; regMaskTP usedReg = avoidReg; noway_assert(*customVarOrderSize >= normalVarOrderSize); if (prefReg) { /* First place the preferred registers at the start of customVarOrder */ regMaskTP regBit; regNumber regNum; for (index = 0; index < normalVarOrderSize; index++) { regNum = normalVarOrder[index]; regBit = genRegMask(regNum); if (usedReg & regBit) continue; if (prefReg & regBit) { usedReg |= regBit; noway_assert(listIndex < normalVarOrderSize); customVarOrder[listIndex++] = regNum; prefReg -= regBit; if (prefReg == 0) break; } } #if CPU_HAS_BYTE_REGS /* Then if byteable registers are preferred place them */ if (prefReg & RBM_BYTE_REG_FLAG) { for (index = 0; index < normalVarOrderSize; index++) { regNum = normalVarOrder[index]; regBit = genRegMask(regNum); if (usedReg & regBit) continue; if (RBM_BYTE_REGS & regBit) { usedReg |= regBit; noway_assert(listIndex < normalVarOrderSize); customVarOrder[listIndex++] = regNum; } } } #endif // CPU_HAS_BYTE_REGS } /* Now place all the non-preferred registers */ for (index = 0; index < normalVarOrderSize; index++) { regNumber regNum = normalVarOrder[index]; regMaskTP regBit = genRegMask(regNum); if (usedReg & regBit) continue; usedReg |= regBit; noway_assert(listIndex < normalVarOrderSize); customVarOrder[listIndex++] = regNum; } if (avoidReg) { /* Now place the "avoid" registers */ for (index = 0; index < normalVarOrderSize; index++) { regNumber regNum = normalVarOrder[index]; regMaskTP regBit = genRegMask(regNum); if (avoidReg & regBit) { noway_assert(listIndex < normalVarOrderSize); customVarOrder[listIndex++] = regNum; avoidReg -= regBit; if (avoidReg == 0) break; } } } *customVarOrderSize = listIndex; noway_assert(listIndex == normalVarOrderSize); } /***************************************************************************** * * Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn */ void Compiler::raSetupArgMasks(RegState* regState) { /* Determine the registers holding incoming register arguments */ /* and setup raAvoidArgRegMask to the set of registers that we */ /* may want to avoid when enregistering the locals. */ regState->rsCalleeRegArgMaskLiveIn = RBM_NONE; raAvoidArgRegMask = RBM_NONE; LclVarDsc* argsEnd = lvaTable + info.compArgsCount; for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++) { noway_assert(argDsc->lvIsParam); // Is it a register argument ? if (!argDsc->lvIsRegArg) continue; // only process args that apply to the current register file if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat) { continue; } // Is it dead on entry ?? // In certain cases such as when compJmpOpUsed is true, // or when we have a generic type context arg that we must report // then the arguments have to be kept alive throughout the prolog. // So we have to consider it as live on entry. // bool keepArgAlive = compJmpOpUsed; if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() && ((lvaTable + info.compTypeCtxtArg) == argDsc)) { keepArgAlive = true; } if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex)) { continue; } // The code to set the regState for each arg is outlined for shared use // by linear scan regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc); // Do we need to try to avoid this incoming arg registers? // If it's not tracked, don't do the stuff below. if (!argDsc->lvTracked) continue; // If the incoming arg is used after a call it is live accross // a call and will have to be allocated to a caller saved // register anyway (a very common case). // // In this case it is pointless to ask that the higher ref count // locals to avoid using the incoming arg register unsigned argVarIndex = argDsc->lvVarIndex; /* Does the incoming register and the arg variable interfere? */ if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex)) { // No they do not interfere, // so we add inArgReg to raAvoidArgRegMask raAvoidArgRegMask |= genRegMask(inArgReg); } #ifdef _TARGET_ARM_ if (argDsc->lvType == TYP_DOUBLE) { // Avoid the double register argument pair for register allocation. if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex)) { raAvoidArgRegMask |= genRegMask(static_cast(inArgReg + 1)); } } #endif } } #endif // LEGACY_BACKEND // The code to set the regState for each arg is outlined for shared use // by linear scan. (It is not shared for System V AMD64 platform.) regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc) { regNumber inArgReg = argDsc->lvArgReg; regMaskTP inArgMask = genRegMask(inArgReg); if (regState->rsIsFloat) { noway_assert(inArgMask & RBM_FLTARG_REGS); } else // regState is for the integer registers { // This might be the fixed return buffer register argument (on ARM64) // We check and allow inArgReg to be theFixedRetBuffReg if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg())) { // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL); // We should have recorded the variable number for the return buffer arg noway_assert(info.compRetBuffArg != BAD_VAR_NUM); } else // we have a regular arg { noway_assert(inArgMask & RBM_ARG_REGS); } } regState->rsCalleeRegArgMaskLiveIn |= inArgMask; #ifdef _TARGET_ARM_ if (argDsc->lvType == TYP_DOUBLE) { if (info.compIsVarArgs || opts.compUseSoftFP) { assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); assert(!regState->rsIsFloat); } else { assert(regState->rsIsFloat); assert(emitter::isDoubleReg(inArgReg)); } regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); } else if (argDsc->lvType == TYP_LONG) { assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); assert(!regState->rsIsFloat); regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); } #endif // _TARGET_ARM_ #if FEATURE_MULTIREG_ARGS if (argDsc->lvType == TYP_STRUCT) { if (argDsc->lvIsHfaRegArg()) { assert(regState->rsIsFloat); unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass()); for (unsigned i = 1; i < cSlots; i++) { assert(inArgReg + i <= LAST_FP_ARGREG); regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast(inArgReg + i)); } } else { unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE; for (unsigned i = 1; i < cSlots; i++) { regNumber nextArgReg = (regNumber)(inArgReg + i); if (nextArgReg > REG_ARG_LAST) { break; } assert(regState->rsIsFloat == false); regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg); } } } #endif // FEATURE_MULTIREG_ARGS return inArgReg; } #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead. /***************************************************************************** * * Assign variables to live in registers, etc. */ void Compiler::raAssignVars() { #ifdef DEBUG if (verbose) printf("*************** In raAssignVars()\n"); #endif /* We need to keep track of which registers we ever touch */ codeGen->regSet.rsClearRegsModified(); #if FEATURE_STACK_FP_X87 // FP register allocation raEnregisterVarsStackFP(); raGenerateFPRefCounts(); #endif /* Predict registers used by code generation */ rpPredictRegUse(); // New reg predictor/allocator // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT) // so that the gc tracking logic and lvMustInit logic will ignore them. unsigned lclNum; LclVarDsc* varDsc; for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { if (varDsc->lvType != TYP_STRUCT) continue; if (!varDsc->lvPromoted) continue; if (varDsc->lvIsParam) continue; if (varDsc->lvRefCnt > 0) continue; #ifdef DEBUG if (verbose) { printf("Mark unused struct local V%02u\n", lclNum); } lvaPromotionType promotionType = lvaGetPromotionType(varDsc); if (promotionType == PROMOTION_TYPE_DEPENDENT) { // This should only happen when all its field locals are unused as well. for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++) { noway_assert(lvaTable[varNum].lvRefCnt == 0); } } else { noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT); } varDsc->lvUnusedStruct = 1; #endif // Change such struct locals to ints varDsc->lvType = TYP_INT; // Bash to a non-gc type. noway_assert(!varDsc->lvTracked); noway_assert(!varDsc->lvRegister); varDsc->lvOnFrame = false; // Force it not to be onstack. varDsc->lvMustInit = false; // Force not to init it. varDsc->lvStkOffs = 0; // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy } } /*****************************************************************************/ /*****************************************************************************/ /***************************************************************************** * * Given a regNumber return the correct predictReg enum value */ inline static rpPredictReg rpGetPredictForReg(regNumber reg) { return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST)); } /***************************************************************************** * * Given a varIndex return the correct predictReg enum value */ inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex) { return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00)); } /***************************************************************************** * * Given a rpPredictReg return the correct varNumber value */ inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict) { return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00; } /***************************************************************************** * * Given a rpPredictReg return true if it specifies a Txx register */ inline static bool rpHasVarIndexForPredict(rpPredictReg predict) { if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX)) return true; else return false; } /***************************************************************************** * * Given a regmask return the correct predictReg enum value */ static rpPredictReg rpGetPredictForMask(regMaskTP regmask) { rpPredictReg result = PREDICT_NONE; if (regmask != 0) /* Check if regmask has zero bits set */ { if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */ { DWORD reg = 0; assert(FitsIn(regmask)); BitScanForward(®, (DWORD)regmask); return rpGetPredictForReg((regNumber)reg); } #if defined(_TARGET_ARM_) /* It has multiple bits set */ else if (regmask == (RBM_R0 | RBM_R1)) { result = PREDICT_PAIR_R0R1; } else if (regmask == (RBM_R2 | RBM_R3)) { result = PREDICT_PAIR_R2R3; } #elif defined(_TARGET_X86_) /* It has multiple bits set */ else if (regmask == (RBM_EAX | RBM_EDX)) { result = PREDICT_PAIR_EAXEDX; } else if (regmask == (RBM_ECX | RBM_EBX)) { result = PREDICT_PAIR_ECXEBX; } #endif else /* It doesn't match anything */ { result = PREDICT_NONE; assert(!"unreachable"); NO_WAY("bad regpair"); } } return result; } /***************************************************************************** * * Record a variable to register(s) interference */ bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg)) { bool addedIntf = false; if (regMask != 0) { for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum)) { regMaskTP regBit = genRegMask(regNum); if (regMask & regBit) { VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, life, raLclRegIntf[regNum])); if (!VarSetOps::IsEmpty(this, newIntf)) { #ifdef DEBUG if (verbose) { VARSET_ITER_INIT(this, newIntfIter, newIntf, varNum); while (newIntfIter.NextElem(this, &varNum)) { unsigned lclNum = lvaTrackedToVarNum[varNum]; LclVarDsc* varDsc = &lvaTable[varNum]; #if FEATURE_FP_REGALLOC // Only print the useful interferences // i.e. floating point LclVar interference with floating point registers // or integer LclVar interference with general purpose registers if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum)) #endif { printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum, getRegName(regNum), msg); } } } #endif addedIntf = true; VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf); } regMask -= regBit; if (regMask == 0) break; } } } return addedIntf; } /***************************************************************************** * * Record a new variable to variable(s) interference */ bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg)) { noway_assert((varNum >= 0) && (varNum < lvaTrackedCount)); noway_assert(!VarSetOps::IsEmpty(this, intfVar)); VARSET_TP VARSET_INIT_NOCOPY(oneVar, VarSetOps::MakeEmpty(this)); VarSetOps::AddElemD(this, oneVar, varNum); bool newIntf = fgMarkIntf(intfVar, oneVar); if (newIntf) rpAddedVarIntf = true; #ifdef DEBUG if (verbose && newIntf) { for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++) { if (VarSetOps::IsMember(this, intfVar, oneNum)) { unsigned lclNum = lvaTrackedToVarNum[varNum]; unsigned lclOne = lvaTrackedToVarNum[oneNum]; printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne, oneNum, msg); } } } #endif return newIntf; } /***************************************************************************** * * Determine preferred register mask for a given predictReg value */ inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type) { if (rpHasVarIndexForPredict(predictReg)) predictReg = PREDICT_REG; noway_assert((unsigned)predictReg < sizeof(rpPredictMap) / sizeof(rpPredictMap[0])); noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL); regMaskTP regAvailForType = rpPredictMap[predictReg]; if (varTypeIsFloating(type)) { regAvailForType &= RBM_ALLFLOAT; } else { regAvailForType &= RBM_ALLINT; } #ifdef _TARGET_ARM_ if (type == TYP_DOUBLE) { if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31)) { // Fix 388433 ARM JitStress WP7 if ((regAvailForType & RBM_DBL_REGS) != 0) { regAvailForType |= (regAvailForType << 1); } else { regAvailForType = RBM_NONE; } } } #endif return regAvailForType; } /***************************************************************************** * * Predict register choice for a type. * * Adds the predicted registers to rsModifiedRegsMask. */ regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs) { regMaskTP preferReg = rpPredictRegMask(predictReg, type); regNumber regNum; regMaskTP regBits; // Add any reserved register to the lockedRegs lockedRegs |= codeGen->regSet.rsMaskResvd; /* Clear out the lockedRegs from preferReg */ preferReg &= ~lockedRegs; if (rpAsgVarNum != -1) { noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED)); /* Don't pick the register used by rpAsgVarNum either */ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum]; noway_assert(tgtVar->lvRegNum != REG_STK); preferReg &= ~genRegMask(tgtVar->lvRegNum); } switch (type) { case TYP_BOOL: case TYP_BYTE: case TYP_UBYTE: case TYP_SHORT: case TYP_CHAR: case TYP_INT: case TYP_UINT: case TYP_REF: case TYP_BYREF: #ifdef _TARGET_AMD64_ case TYP_LONG: #endif // _TARGET_AMD64_ // expand preferReg to all non-locked registers if no bits set preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs); if (preferReg == 0) // no bits set? { // Add one predefined spill choice register if no bits set. // (The jit will introduce one spill temp) preferReg |= RBM_SPILL_CHOICE; rpPredictSpillCnt++; #ifdef DEBUG if (verbose) printf("Predict one spill temp\n"); #endif } if (preferReg != 0) { /* Iterate the registers in the order specified by rpRegTmpOrder */ for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++) { regNum = rpRegTmpOrder[index]; regBits = genRegMask(regNum); if ((preferReg & regBits) == regBits) { goto RET; } } } /* Otherwise we have allocated all registers, so do nothing */ break; #ifndef _TARGET_AMD64_ case TYP_LONG: if ((preferReg == 0) || // no bits set? ((preferReg & (preferReg - 1)) == 0)) // or only one bit set? { // expand preferReg to all non-locked registers preferReg = RBM_ALLINT & ~lockedRegs; } if (preferReg == 0) // no bits set? { // Add EAX:EDX to the registers // (The jit will introduce two spill temps) preferReg = RBM_PAIR_TMP; rpPredictSpillCnt += 2; #ifdef DEBUG if (verbose) printf("Predict two spill temps\n"); #endif } else if ((preferReg & (preferReg - 1)) == 0) // only one bit set? { if ((preferReg & RBM_PAIR_TMP_LO) == 0) { // Add EAX to the registers // (The jit will introduce one spill temp) preferReg |= RBM_PAIR_TMP_LO; } else { // Add EDX to the registers // (The jit will introduce one spill temp) preferReg |= RBM_PAIR_TMP_HI; } rpPredictSpillCnt++; #ifdef DEBUG if (verbose) printf("Predict one spill temp\n"); #endif } regPairNo regPair; regPair = codeGen->regSet.rsFindRegPairNo(preferReg); if (regPair != REG_PAIR_NONE) { regBits = genRegPairMask(regPair); goto RET; } /* Otherwise we have allocated all registers, so do nothing */ break; #endif // _TARGET_AMD64_ #ifdef _TARGET_ARM_ case TYP_STRUCT: #endif case TYP_FLOAT: case TYP_DOUBLE: #if FEATURE_FP_REGALLOC regMaskTP restrictMask; restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH); assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT); // expand preferReg to all available non-locked registers if no bits set preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs); regMaskTP preferDouble; preferDouble = preferReg & (preferReg >> 1); if ((preferReg == 0) // no bits set? #ifdef _TARGET_ARM_ || ((type == TYP_DOUBLE) && ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE #endif ) { // Add one predefined spill choice register if no bits set. // (The jit will introduce one spill temp) preferReg |= RBM_SPILL_CHOICE_FLT; rpPredictSpillCnt++; #ifdef DEBUG if (verbose) printf("Predict one spill temp (float)\n"); #endif } assert(preferReg != 0); /* Iterate the registers in the order specified by raRegFltTmpOrder */ for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++) { regNum = raRegFltTmpOrder[index]; regBits = genRegMask(regNum); if (varTypeIsFloating(type)) { #ifdef _TARGET_ARM_ if (type == TYP_DOUBLE) { if ((regBits & RBM_DBL_REGS) == 0) { continue; // We must restrict the set to the double registers } else { // TYP_DOUBLE use two consecutive registers regBits |= genRegMask(REG_NEXT(regNum)); } } #endif // See if COMPlus_JitRegisterFP is restricting this FP register // if ((restrictMask & regBits) != regBits) continue; } if ((preferReg & regBits) == regBits) { goto RET; } } /* Otherwise we have allocated all registers, so do nothing */ break; #else // !FEATURE_FP_REGALLOC return RBM_NONE; #endif default: noway_assert(!"unexpected type in reg use prediction"); } /* Abnormal return */ noway_assert(!"Ran out of registers in rpPredictRegPick"); return RBM_NONE; RET: /* * If during the first prediction we need to allocate * one of the registers that we used for coloring locals * then flag this by setting rpPredictAssignAgain. * We will have to go back and repredict the registers */ if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits)) rpPredictAssignAgain = true; // Add a register interference to each of the last use variables if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace)) { VARSET_TP VARSET_INIT_NOCOPY(lastUse, VarSetOps::MakeEmpty(this)); VarSetOps::Assign(this, lastUse, rpLastUseVars); VARSET_TP VARSET_INIT_NOCOPY(inPlaceUse, VarSetOps::MakeEmpty(this)); VarSetOps::Assign(this, inPlaceUse, rpUseInPlace); // While we still have any lastUse or inPlaceUse bits VARSET_TP VARSET_INIT_NOCOPY(useUnion, VarSetOps::Union(this, lastUse, inPlaceUse)); VARSET_TP VARSET_INIT_NOCOPY(varAsSet, VarSetOps::MakeEmpty(this)); VARSET_ITER_INIT(this, iter, useUnion, varNum); while (iter.NextElem(this, &varNum)) { // We'll need this for one of the calls... VarSetOps::ClearD(this, varAsSet); VarSetOps::AddElemD(this, varAsSet, varNum); // If this varBit and lastUse? if (VarSetOps::IsMember(this, lastUse, varNum)) { // Record a register to variable interference rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick")); } // If this varBit and inPlaceUse? if (VarSetOps::IsMember(this, inPlaceUse, varNum)) { // Record a register to variable interference rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick")); } } } codeGen->regSet.rsSetRegsModified(regBits); return regBits; } /***************************************************************************** * * Predict integer register use for generating an address mode for a tree, * by setting tree->gtUsedRegs to all registers used by this tree and its * children. * tree - is the child of a GT_IND node * type - the type of the GT_IND node (floating point/integer) * lockedRegs - are the registers which are currently held by * a previously evaluated node. * rsvdRegs - registers which should not be allocated because they will * be needed to evaluate a node in the future * - Also if rsvdRegs has the RBM_LASTUSE bit set then * the rpLastUseVars set should be saved and restored * so that we don't add any new variables to rpLastUseVars * lenCSE - is non-NULL only when we have a lenCSE expression * * Return the scratch registers to be held by this tree. (one or two registers * to form an address expression) */ regMaskTP Compiler::rpPredictAddressMode( GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE) { GenTreePtr op1; GenTreePtr op2; GenTreePtr opTemp; genTreeOps oper = tree->OperGet(); regMaskTP op1Mask; regMaskTP op2Mask; regMaskTP regMask; ssize_t sh; ssize_t cns = 0; bool rev; bool hasTwoAddConst = false; bool restoreLastUseVars = false; VARSET_TP VARSET_INIT_NOCOPY(oldLastUseVars, VarSetOps::MakeEmpty(this)); /* do we need to save and restore the rpLastUseVars set ? */ if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL)) { restoreLastUseVars = true; VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars); } rsvdRegs &= ~RBM_LASTUSE; /* if not an add, then just force it to a register */ if (oper != GT_ADD) { if (oper == GT_ARR_ELEM) { regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs); goto DONE; } else { goto NO_ADDR_EXPR; } } op1 = tree->gtOp.gtOp1; op2 = tree->gtOp.gtOp2; rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0); /* look for (x + y) + icon address mode */ if (op2->OperGet() == GT_CNS_INT) { cns = op2->gtIntCon.gtIconVal; /* if not an add, then just force op1 into a register */ if (op1->OperGet() != GT_ADD) goto ONE_ADDR_EXPR; hasTwoAddConst = true; /* Record the 'rev' flag, reverse evaluation order */ rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0); op2 = op1->gtOp.gtOp2; op1 = op1->gtOp.gtOp1; // Overwrite op1 last!! } /* Check for CNS_INT or LSH of CNS_INT in op2 slot */ sh = 0; if (op2->OperGet() == GT_LSH) { if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT) { sh = op2->gtOp.gtOp2->gtIntCon.gtIconVal; opTemp = op2->gtOp.gtOp1; } else { opTemp = NULL; } } else { opTemp = op2; } if (opTemp != NULL) { if (opTemp->OperGet() == GT_NOP) { opTemp = opTemp->gtOp.gtOp1; } // Is this a const operand? if (opTemp->OperGet() == GT_CNS_INT) { // Compute the new cns value that Codegen will end up using cns += (opTemp->gtIntCon.gtIconVal << sh); goto ONE_ADDR_EXPR; } } /* Check for LSH in op1 slot */ if (op1->OperGet() != GT_LSH) goto TWO_ADDR_EXPR; opTemp = op1->gtOp.gtOp2; if (opTemp->OperGet() != GT_CNS_INT) goto TWO_ADDR_EXPR; sh = opTemp->gtIntCon.gtIconVal; /* Check for LSH of 0, special case */ if (sh == 0) goto TWO_ADDR_EXPR; #if defined(_TARGET_XARCH_) /* Check for LSH of 1 2 or 3 */ if (sh > 3) goto TWO_ADDR_EXPR; #elif defined(_TARGET_ARM_) /* Check for LSH of 1 to 30 */ if (sh > 30) goto TWO_ADDR_EXPR; #else goto TWO_ADDR_EXPR; #endif /* Matched a leftShift by 'sh' subtree, move op1 down */ op1 = op1->gtOp.gtOp1; TWO_ADDR_EXPR: /* Now we have to evaluate op1 and op2 into registers */ /* Evaluate op1 and op2 in the correct order */ if (rev) { op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs); op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs); } else { op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs); op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs); } /* If op1 and op2 must be spilled and reloaded then * op1 and op2 might be reloaded into the same register * This can only happen when all the registers are lockedRegs */ if ((op1Mask == op2Mask) && (op1Mask != 0)) { /* We'll need to grab a different register for op2 */ op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask); } #ifdef _TARGET_ARM_ // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form // [op2 + op1<validDispForLdSt(cns, type)) { op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask)); } // // If we create a CSE that immediately dies then we may need to add an additional register interference // so we don't color the CSE into R3 // if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA)) { opTemp = op2->gtOp.gtOp2; if (opTemp->OperGet() == GT_LCL_VAR) { unsigned varNum = opTemp->gtLclVar.gtLclNum; LclVarDsc* varDsc = &lvaTable[varNum]; if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex)) { rpRecordRegIntf(RBM_TMP_0, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)")); } } } #endif regMask = (op1Mask | op2Mask); tree->gtUsedRegs = (regMaskSmall)regMask; goto DONE; ONE_ADDR_EXPR: /* now we have to evaluate op1 into a register */ op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs); op2Mask = RBM_NONE; #ifdef _TARGET_ARM_ // // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st // instruction // if (!codeGen->validDispForLdSt(cns, type)) { op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask)); } #endif regMask = (op1Mask | op2Mask); tree->gtUsedRegs = (regMaskSmall)regMask; goto DONE; NO_ADDR_EXPR: #if !CPU_LOAD_STORE_ARCH if (oper == GT_CNS_INT) { /* Indirect of a constant does not require a register */ regMask = RBM_NONE; } else #endif { /* now we have to evaluate tree into a register */ regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs); } DONE: regMaskTP regUse = tree->gtUsedRegs; if (!VarSetOps::IsEmpty(this, compCurLife)) { // Add interference between the current set of life variables and // the set of temporary registers need to evaluate the sub tree if (regUse) { rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)")); } } /* Do we need to resore the oldLastUseVars value */ if (restoreLastUseVars) { /* * If we used a GT_ASG targeted register then we need to add * a variable interference between any new last use variables * and the GT_ASG targeted register */ if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1) { rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)")); } VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars); } return regMask; } /***************************************************************************** * * */ void Compiler::rpPredictRefAssign(unsigned lclNum) { LclVarDsc* varDsc = lvaTable + lclNum; varDsc->lvRefAssign = 1; #if NOGC_WRITE_BARRIERS #ifdef DEBUG if (verbose) { if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex)) printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum, varDsc->lvVarIndex); } #endif /* Make sure that write barrier pointer variables never land in EDX */ VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex); #endif // NOGC_WRITE_BARRIERS } /***************************************************************************** * * Predict the internal temp physical register usage for a block assignment tree, * by setting tree->gtUsedRegs. * Records the internal temp physical register usage for this tree. * Returns a mask of interfering registers for this tree. * * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs * to the set of scratch registers needed when evaluating the tree. * Generally tree->gtUsedRegs and the return value retMask are the same, except when the * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we * predict additional internal temp physical registers to spill into. * * tree - is the child of a GT_IND node * predictReg - what type of register does the tree need * lockedRegs - are the registers which are currently held by a previously evaluated node. * Don't modify lockedRegs as it is used at the end to compute a spill mask. * rsvdRegs - registers which should not be allocated because they will * be needed to evaluate a node in the future * - Also, if rsvdRegs has the RBM_LASTUSE bit set then * the rpLastUseVars set should be saved and restored * so that we don't add any new variables to rpLastUseVars. */ regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr tree, rpPredictReg predictReg, regMaskTP lockedRegs, regMaskTP rsvdRegs) { regMaskTP regMask = RBM_NONE; regMaskTP interferingRegs = RBM_NONE; bool hasGCpointer = false; bool dstIsOnStack = false; bool useMemHelper = false; bool useBarriers = false; GenTreeBlk* dst = tree->gtGetOp1()->AsBlk(); GenTreePtr dstAddr = dst->Addr(); GenTreePtr srcAddrOrFill = tree->gtGetOp2IfPresent(); size_t blkSize = dst->gtBlkSize; hasGCpointer = (dst->HasGCPtr()); bool isCopyBlk = tree->OperIsCopyBlkOp(); bool isCopyObj = isCopyBlk && hasGCpointer; bool isInitBlk = tree->OperIsInitBlkOp(); if (isCopyBlk) { assert(srcAddrOrFill->OperIsIndir()); srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr(); } else { // For initBlk, we don't need to worry about the GC pointers. hasGCpointer = false; } if (blkSize != 0) { if (isCopyObj) { dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK)); } if (isInitBlk) { if (srcAddrOrFill->OperGet() != GT_CNS_INT) { useMemHelper = true; } } } else { useMemHelper = true; } if (hasGCpointer && !dstIsOnStack) { useBarriers = true; } #ifdef _TARGET_ARM_ // // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths. // if (!useMemHelper && !useBarriers) { bool useLoop = false; unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE; // A mask to use to force the predictor to choose low registers (to reduce code size) regMaskTP avoidReg = (RBM_R12 | RBM_LR); // Allow the src and dst to be used in place, unless we use a loop, in which // case we will need scratch registers as we will be writing to them. rpPredictReg srcAndDstPredict = PREDICT_REG; // Will we be using a loop to implement this INITBLK/COPYBLK? if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16))) { useLoop = true; avoidReg = RBM_NONE; srcAndDstPredict = PREDICT_SCRATCH_REG; } if (tree->gtFlags & GTF_REVERSE_OPS) { regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs, dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE); regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg); } else { regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs, srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE); regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg); } // We need at least one scratch register for a copyBlk if (isCopyBlk) { // Pick a low register to reduce the code size regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg); } if (useLoop) { if (isCopyBlk) { // We need a second temp register for a copyBlk (our code gen is load two/store two) // Pick another low register to reduce the code size regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg); } // We need a loop index register regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask); } tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask; return interferingRegs; } #endif // What order should the Dest, Val/Src, and Size be calculated GenTreePtr opsPtr[3]; regMaskTP regsPtr[3]; #if defined(_TARGET_XARCH_) fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr); // We're going to use these, might as well make them available now codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX); if (isCopyBlk) codeGen->regSet.rsSetRegsModified(RBM_ESI); #elif defined(_TARGET_ARM_) if (useMemHelper) { // For all other cases that involve non-constants, we just call memcpy/memset // JIT helpers fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); interferingRegs |= RBM_CALLEE_TRASH; #ifdef DEBUG if (verbose) printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n"); #endif } else // useBarriers { assert(useBarriers); assert(isCopyBlk); fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr); // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper interferingRegs |= RBM_CALLEE_TRASH_NOGC; #ifdef DEBUG if (verbose) printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n"); #endif } #else // !_TARGET_X86_ && !_TARGET_ARM_ #error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK" #endif // !_TARGET_X86_ && !_TARGET_ARM_ regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs; regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs, opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE); regMask |= regsPtr[0]; opsPtr[0]->gtUsedRegs |= regsPtr[0]; rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest")); regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask, opsPtr2RsvdRegs | RBM_LASTUSE); regMask |= regsPtr[1]; opsPtr[1]->gtUsedRegs |= regsPtr[1]; rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src")); regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2]; if (opsPtr[2] == nullptr) { // If we have no "size" node, we will predict that regsPtr[2] will be used for the size. // Note that it is quite possible that no register is required, but this preserves // former behavior. regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask); rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use")); } else { regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE); opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs; } regMask |= opsPtr2UsedRegs; tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask; return interferingRegs; } /***************************************************************************** * * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs. * Returns a regMask with the internal temp physical register usage for this tree. * * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs * to the set of scratch registers needed when evaluating the tree. * Generally tree->gtUsedRegs and the return value retMask are the same, except when the * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we * predict additional internal temp physical registers to spill into. * * tree - is the child of a GT_IND node * predictReg - what type of register does the tree need * lockedRegs - are the registers which are currently held by a previously evaluated node. * Don't modify lockedRegs as it is used at the end to compute a spill mask. * rsvdRegs - registers which should not be allocated because they will * be needed to evaluate a node in the future * - Also, if rsvdRegs has the RBM_LASTUSE bit set then * the rpLastUseVars set should be saved and restored * so that we don't add any new variables to rpLastUseVars. */ #pragma warning(disable : 4701) #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree, rpPredictReg predictReg, regMaskTP lockedRegs, regMaskTP rsvdRegs) { regMaskTP regMask = DUMMY_INIT(RBM_ILLEGAL); regMaskTP op2Mask; regMaskTP tmpMask; rpPredictReg op1PredictReg; rpPredictReg op2PredictReg; LclVarDsc* varDsc = NULL; VARSET_TP VARSET_INIT_NOCOPY(oldLastUseVars, VarSetOps::UninitVal()); VARSET_TP VARSET_INIT_NOCOPY(varBits, VarSetOps::UninitVal()); VARSET_TP VARSET_INIT_NOCOPY(lastUseVarBits, VarSetOps::MakeEmpty(this)); bool restoreLastUseVars = false; regMaskTP interferingRegs = RBM_NONE; #ifdef DEBUG // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree); noway_assert(tree); noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0)); noway_assert(RBM_ILLEGAL); noway_assert((lockedRegs & RBM_ILLEGAL) == 0); /* impossible values, to make sure that we set them */ tree->gtUsedRegs = RBM_ILLEGAL; #endif /* Figure out what kind of a node we have */ genTreeOps oper = tree->OperGet(); var_types type = tree->TypeGet(); unsigned kind = tree->OperKind(); // In the comma case, we care about whether this is "effectively" ADDR(IND(...)) genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet(); if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND)) predictReg = PREDICT_NONE; else if (rpHasVarIndexForPredict(predictReg)) { // The only place where predictReg is set to a var is in the PURE // assignment case where varIndex is the var being assigned to. // We need to check whether the variable is used between here and // its redefinition. unsigned varIndex = rpGetVarIndexForPredict(predictReg); unsigned lclNum = lvaTrackedToVarNum[varIndex]; bool found = false; for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext) { if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum) { // Is this the pure assignment? if ((nextTree->gtFlags & GTF_VAR_DEF) == 0) { predictReg = PREDICT_SCRATCH_REG; } found = true; break; } } assert(found); } if (rsvdRegs & RBM_LASTUSE) { restoreLastUseVars = true; VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars); rsvdRegs &= ~RBM_LASTUSE; } /* Is this a constant or leaf node? */ if (kind & (GTK_CONST | GTK_LEAF)) { bool lastUse = false; regMaskTP enregMask = RBM_NONE; switch (oper) { #ifdef _TARGET_ARM_ case GT_CNS_DBL: // Codegen for floating point constants on the ARM is currently // movw/movt rT1, // movw/movt rT2, // vmov.i2d dT0, rT1,rT2 // // For TYP_FLOAT one integer register is required // // These integer register(s) immediately die tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs); if (type == TYP_DOUBLE) { // For TYP_DOUBLE a second integer register is required // tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask); } // We also need a floating point register that we keep // if (predictReg == PREDICT_NONE) predictReg = PREDICT_SCRATCH_REG; regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs); tree->gtUsedRegs = regMask | tmpMask; goto RETURN_CHECK; #endif case GT_CNS_INT: case GT_CNS_LNG: if (rpHasVarIndexForPredict(predictReg)) { unsigned tgtIndex = rpGetVarIndexForPredict(predictReg); rpAsgVarNum = tgtIndex; // We don't need any register as we plan on writing to the rpAsgVarNum register predictReg = PREDICT_NONE; LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex]; tgtVar->lvDependReg = true; if (type == TYP_LONG) { assert(oper == GT_CNS_LNG); if (tgtVar->lvOtherReg == REG_STK) { // Well we do need one register for a partially enregistered type = TYP_INT; predictReg = PREDICT_SCRATCH_REG; } } } else { #if !CPU_LOAD_STORE_ARCH /* If the constant is a handle then it will need to have a relocation applied to it. It will need to be loaded into a register. But never throw away an existing hint. */ if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle()) #endif { if (predictReg == PREDICT_NONE) predictReg = PREDICT_SCRATCH_REG; } } break; case GT_NO_OP: break; case GT_CLS_VAR: if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) && (genTypeSize(type) < sizeof(int))) { predictReg = PREDICT_SCRATCH_REG; } #ifdef _TARGET_ARM_ // Unaligned loads/stores for floating point values must first be loaded into integer register(s) // if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type)) { // These integer register(s) immediately die tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs); // Two integer registers are required for a TYP_DOUBLE if (type == TYP_DOUBLE) tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask); } // We need a temp register in some cases of loads/stores to a class var if (predictReg == PREDICT_NONE) { predictReg = PREDICT_SCRATCH_REG; } #endif if (rpHasVarIndexForPredict(predictReg)) { unsigned tgtIndex = rpGetVarIndexForPredict(predictReg); rpAsgVarNum = tgtIndex; // We don't need any register as we plan on writing to the rpAsgVarNum register predictReg = PREDICT_NONE; LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex]; tgtVar->lvDependReg = true; if (type == TYP_LONG) { if (tgtVar->lvOtherReg == REG_STK) { // Well we do need one register for a partially enregistered type = TYP_INT; predictReg = PREDICT_SCRATCH_REG; } } } break; case GT_LCL_FLD: #ifdef _TARGET_ARM_ // Check for a misalignment on a Floating Point field // if (varTypeIsFloating(type)) { if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0) { // These integer register(s) immediately die tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs); // Two integer registers are required for a TYP_DOUBLE if (type == TYP_DOUBLE) tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask); } } #endif __fallthrough; case GT_LCL_VAR: case GT_REG_VAR: varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum; VarSetOps::Assign(this, varBits, fgGetVarBits(tree)); compUpdateLifeVar(tree, &lastUseVarBits); lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits); #if FEATURE_STACK_FP_X87 // If it's a floating point var, there's nothing to do if (varTypeIsFloating(type)) { tree->gtUsedRegs = RBM_NONE; regMask = RBM_NONE; goto RETURN_CHECK; } #endif // If the variable is already a register variable, no need to go further. if (oper == GT_REG_VAR) break; /* Apply the type of predictReg to the LCL_VAR */ if (predictReg == PREDICT_REG) { PREDICT_REG_COMMON: if (varDsc->lvRegNum == REG_STK) break; goto GRAB_COUNT; } else if (predictReg == PREDICT_SCRATCH_REG) { noway_assert(predictReg == PREDICT_SCRATCH_REG); /* Is this the last use of a local var? */ if (lastUse) { if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits)) goto PREDICT_REG_COMMON; } } else if (rpHasVarIndexForPredict(predictReg)) { /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */ { unsigned tgtIndex1 = rpGetVarIndexForPredict(predictReg); LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex1]; VarSetOps::MakeSingleton(this, tgtIndex1); noway_assert(tgtVar->lvVarIndex == tgtIndex1); noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */ #ifndef _TARGET_AMD64_ // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT // so this assert is meaningless noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG)); #endif // !_TARGET_AMD64_ if (varDsc->lvTracked) { unsigned srcIndex; srcIndex = varDsc->lvVarIndex; // If this register has it's last use here then we will prefer // to color to the same register as tgtVar. if (lastUse) { /* * Add an entry in the lvaVarPref graph to indicate * that it would be worthwhile to color these two variables * into the same physical register. * This will help us avoid having an extra copy instruction */ VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1); VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex); } // Add a variable interference from srcIndex to each of the last use variables if (!VarSetOps::IsEmpty(this, rpLastUseVars)) { rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict")); } } rpAsgVarNum = tgtIndex1; /* We will rely on the target enregistered variable from the GT_ASG */ varDsc = tgtVar; } GRAB_COUNT: unsigned grabCount; grabCount = 0; if (genIsValidFloatReg(varDsc->lvRegNum)) { enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet()); } else { enregMask = genRegMask(varDsc->lvRegNum); } #ifdef _TARGET_ARM_ if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT)) { // We need to compute the intermediate value using a TYP_DOUBLE // but we storing the result in a TYP_SINGLE enregistered variable // grabCount++; } else #endif { /* We can't trust a prediction of rsvdRegs or lockedRegs sets */ if (enregMask & (rsvdRegs | lockedRegs)) { grabCount++; } #ifndef _TARGET_64BIT_ if (type == TYP_LONG) { if (varDsc->lvOtherReg != REG_STK) { tmpMask = genRegMask(varDsc->lvOtherReg); enregMask |= tmpMask; /* We can't trust a prediction of rsvdRegs or lockedRegs sets */ if (tmpMask & (rsvdRegs | lockedRegs)) grabCount++; } else // lvOtherReg == REG_STK { grabCount++; } } #endif // _TARGET_64BIT_ } varDsc->lvDependReg = true; if (grabCount == 0) { /* Does not need a register */ predictReg = PREDICT_NONE; // noway_assert(!VarSetOps::IsEmpty(this, varBits)); VarSetOps::UnionD(this, rpUseInPlace, varBits); } else // (grabCount > 0) { #ifndef _TARGET_64BIT_ /* For TYP_LONG and we only need one register then change the type to TYP_INT */ if ((type == TYP_LONG) && (grabCount == 1)) { /* We will need to pick one register */ type = TYP_INT; // noway_assert(!VarSetOps::IsEmpty(this, varBits)); VarSetOps::UnionD(this, rpUseInPlace, varBits); } noway_assert((type == TYP_DOUBLE) || (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES))); #else // !_TARGET_64BIT_ noway_assert(grabCount == 1); #endif // !_TARGET_64BIT_ } } else if (type == TYP_STRUCT) { #ifdef _TARGET_ARM_ // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when // predictReg is PREDICT_REG_R0 to PREDICT_REG_R3 // As a workaround we just bash it to PREDICT_NONE here // if (predictReg != PREDICT_NONE) predictReg = PREDICT_NONE; #endif // Currently predictReg is saying that we will not need any scratch registers noway_assert(predictReg == PREDICT_NONE); /* We may need to sign or zero extend a small type when pushing a struct */ if (varDsc->lvPromoted && !varDsc->lvAddrExposed) { for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++) { LclVarDsc* fldVar = lvaTable + varNum; if (fldVar->lvStackAligned()) { // When we are stack aligned Codegen will just use // a push instruction and thus doesn't need any register // since we can push both a register or a stack frame location continue; } if (varTypeIsByte(fldVar->TypeGet())) { // We will need to reserve one byteable register, // type = TYP_BYTE; predictReg = PREDICT_SCRATCH_REG; #if CPU_HAS_BYTE_REGS // It is best to enregister this fldVar in a byteable register // fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this); #endif } else if (varTypeIsShort(fldVar->TypeGet())) { bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK); // If fldVar is not enregistered then we will need a scratch register // if (!isEnregistered) { // We will need either an int register or a byte register // If we are not requesting a byte register we will request an int register // if (type != TYP_BYTE) type = TYP_INT; predictReg = PREDICT_SCRATCH_REG; } } } } } else { regMaskTP preferReg = rpPredictRegMask(predictReg, type); if (preferReg != 0) { if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type))) { varDsc->addPrefReg(preferReg, this); } } } break; /* end of case GT_LCL_VAR */ case GT_JMP: tree->gtUsedRegs = RBM_NONE; regMask = RBM_NONE; #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED) // Mark the registers required to emit a tailcall profiler callback if (compIsProfilerHookNeeded()) { tree->gtUsedRegs |= RBM_PROFILER_JMP_USED; } #endif goto RETURN_CHECK; default: break; } /* end of switch (oper) */ /* If we don't need to evaluate to register, regmask is the empty set */ /* Otherwise we grab a temp for the local variable */ if (predictReg == PREDICT_NONE) regMask = RBM_NONE; else { regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask); if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT)) { /* We need to sign or zero extend a small type when pushing a struct */ noway_assert((type == TYP_INT) || (type == TYP_BYTE)); varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum; noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed); for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++) { LclVarDsc* fldVar = lvaTable + varNum; if (fldVar->lvTracked) { VARSET_TP VARSET_INIT_NOCOPY(fldBit, VarSetOps::MakeSingleton(this, fldVar->lvVarIndex)); rpRecordRegIntf(regMask, fldBit DEBUGARG( "need scratch register when pushing a small field of a struct")); } } } } /* Update the set of lastUse variables that we encountered so far */ if (lastUse) { VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits); VARSET_TP VARSET_INIT(this, varAsSet, lastUseVarBits); /* * Add interference from any previously locked temps into this last use variable. */ if (lockedRegs) { rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs")); } /* * Add interference from any reserved temps into this last use variable. */ if (rsvdRegs) { rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs")); } /* * For partially enregistered longs add an interference with the * register return by rpPredictRegPick */ if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG)) { rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg")); } } tree->gtUsedRegs = (regMaskSmall)regMask; goto RETURN_CHECK; } /* Is it a 'simple' unary/binary operator? */ if (kind & GTK_SMPOP) { GenTreePtr op1 = tree->gtOp.gtOp1; GenTreePtr op2 = tree->gtGetOp2IfPresent(); GenTreePtr opsPtr[3]; regMaskTP regsPtr[3]; VARSET_TP VARSET_INIT_NOCOPY(startAsgUseInPlaceVars, VarSetOps::UninitVal()); switch (oper) { case GT_ASG: /* Is the value being assigned into a LCL_VAR? */ if (op1->gtOper == GT_LCL_VAR) { varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum; /* Are we assigning a LCL_VAR the result of a call? */ if (op2->gtOper == GT_CALL) { /* Set a preferred register for the LCL_VAR */ if (isRegPairType(varDsc->TypeGet())) varDsc->addPrefReg(RBM_LNGRET, this); else if (!varTypeIsFloating(varDsc->TypeGet())) varDsc->addPrefReg(RBM_INTRET, this); #ifdef _TARGET_AMD64_ else varDsc->addPrefReg(RBM_FLOATRET, this); #endif /* * When assigning the result of a call we don't * bother trying to target the right side of the * assignment, since we have a fixed calling convention. */ } else if (varDsc->lvTracked) { // We interfere with uses in place if (!VarSetOps::IsEmpty(this, rpUseInPlace)) { rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict")); } // Did we predict that this local will be fully enregistered? // and the assignment type is the same as the expression type? // and it is dead on the right side of the assignment? // and we current have no other rpAsgVarNum active? // if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) && (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1)) { // // Yes, we should try to target the right side (op2) of this // assignment into the (enregistered) tracked variable. // op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */ op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex); // Remember that this is a new use in place // We've added "new UseInPlace"; remove from the global set. VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex); // Note that later when we walk down to the leaf node for op2 // if we decide to actually use the register for the 'varDsc' // to enregister the operand, the we will set rpAsgVarNum to // varDsc->lvVarIndex, by extracting this value using // rpGetVarIndexForPredict() // // Also we reset rpAsgVarNum back to -1 after we have finished // predicting the current GT_ASG node // goto ASG_COMMON; } } } else if (tree->OperIsBlkOp()) { interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs); regMask = 0; goto RETURN_CHECK; } __fallthrough; case GT_CHS: case GT_ASG_OR: case GT_ASG_XOR: case GT_ASG_AND: case GT_ASG_SUB: case GT_ASG_ADD: case GT_ASG_MUL: case GT_ASG_DIV: case GT_ASG_UDIV: /* We can't use "reg = addr" for TYP_LONG or if op2 is a short type */ if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType)) { /* Is the value being assigned into an enregistered LCL_VAR? */ /* For debug code we only allow a simple op2 to be assigned */ if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false))) { varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum; /* Did we predict that this local will be enregistered? */ if (varDsc->lvRegNum != REG_STK) { /* Yes, we can use "reg = addr" */ op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */ op2PredictReg = PREDICT_NONE; goto ASG_COMMON; } } } #if CPU_LOAD_STORE_ARCH if (oper != GT_ASG) { op1PredictReg = PREDICT_REG; op2PredictReg = PREDICT_REG; } else #endif { /* * Otherwise, initialize the normal forcing of operands: * "addr = reg" */ op1PredictReg = PREDICT_ADDR; op2PredictReg = PREDICT_REG; } ASG_COMMON: #if !CPU_LOAD_STORE_ARCH if (op2PredictReg != PREDICT_NONE) { /* Is the value being assigned a simple one? */ if (rpCanAsgOperWithoutReg(op2, false)) op2PredictReg = PREDICT_NONE; } #endif bool simpleAssignment; simpleAssignment = false; if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR)) { // Add a variable interference from the assign target // to each of the last use variables if (!VarSetOps::IsEmpty(this, rpLastUseVars)) { varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum; if (varDsc->lvTracked) { unsigned varIndex = varDsc->lvVarIndex; rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict")); } } /* Record whether this tree is a simple assignment to a local */ simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode); } bool requireByteReg; requireByteReg = false; #if CPU_HAS_BYTE_REGS /* Byte-assignments need the byte registers, unless op1 is an enregistered local */ if (varTypeIsByte(type) && ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK))) { // Byte-assignments typically need a byte register requireByteReg = true; if (op1->gtOper == GT_LCL_VAR) { varDsc = lvaTable + op1->gtLclVar.gtLclNum; // Did we predict that this local will be enregistered? if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS)) { // We don't require a byte register when op1 is an enregistered local */ requireByteReg = false; } // Is op1 part of an Assign-Op or is the RHS a simple memory indirection? if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR)) { // We should try to put op1 in an byte register varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this); } } } #endif VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace); bool isWriteBarrierAsgNode; isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree); #ifdef DEBUG GCInfo::WriteBarrierForm wbf; if (isWriteBarrierAsgNode) wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2); else wbf = GCInfo::WBF_NoBarrier; #endif // DEBUG regMaskTP wbaLockedRegs; wbaLockedRegs = lockedRegs; if (isWriteBarrierAsgNode) { #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS #ifdef DEBUG if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) { #endif // DEBUG wbaLockedRegs |= RBM_WRITE_BARRIER; op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER assert(REG_WRITE_BARRIER == REG_EDX); op1PredictReg = PREDICT_REG_EDX; #ifdef DEBUG } else #endif // DEBUG #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS) { #ifdef _TARGET_X86_ op1PredictReg = PREDICT_REG_ECX; op2PredictReg = PREDICT_REG_EDX; #elif defined(_TARGET_ARM_) op1PredictReg = PREDICT_REG_R0; op2PredictReg = PREDICT_REG_R1; // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL". if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK)) { op1 = op1->gtOp.gtOp1; } #else // !_TARGET_X86_ && !_TARGET_ARM_ #error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg" #endif } #endif } /* Are we supposed to evaluate RHS first? */ if (tree->gtFlags & GTF_REVERSE_OPS) { op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs); #if CPU_HAS_BYTE_REGS // Should we insure that op2 gets evaluated into a byte register? if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0)) { // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX) // and we can't select one that is already reserved (i.e. lockedRegs) // op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS)); op2->gtUsedRegs |= op2Mask; // No longer a simple assignment because we're using extra registers and might // have interference between op1 and op2. See DevDiv #136681 simpleAssignment = false; } #endif /* * For a simple assignment we don't want the op2Mask to be * marked as interferring with the LCL_VAR, since it is likely * that we will want to enregister the LCL_VAR in exactly * the register that is used to compute op2 */ tmpMask = lockedRegs; if (!simpleAssignment) tmpMask |= op2Mask; regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE); // Did we relax the register prediction for op1 and op2 above ? // - because we are depending upon op1 being enregistered // if ((op1PredictReg == PREDICT_NONE) && ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg))) { /* We must be assigning into an enregistered LCL_VAR */ noway_assert(op1->gtOper == GT_LCL_VAR); varDsc = lvaTable + op1->gtLclVar.gtLclNum; noway_assert(varDsc->lvRegNum != REG_STK); /* We need to set lvDependReg, in case we lose the enregistration of op1 */ varDsc->lvDependReg = true; } } else { // For the case of simpleAssignments op2 should always be evaluated first noway_assert(!simpleAssignment); regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs); if (isWriteBarrierAsgNode) { wbaLockedRegs |= op1->gtUsedRegs; } op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE); #if CPU_HAS_BYTE_REGS // Should we insure that op2 gets evaluated into a byte register? if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0)) { // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX) // and we can't select one that is already reserved (i.e. lockedRegs or regMask) // op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS)); op2->gtUsedRegs |= op2Mask; } #endif } if (rpHasVarIndexForPredict(op2PredictReg)) { rpAsgVarNum = -1; } if (isWriteBarrierAsgNode) { #if NOGC_WRITE_BARRIERS #ifdef DEBUG if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) { #endif // DEBUG /* Steer computation away from REG_WRITE_BARRIER as the pointer is passed to the write-barrier call in REG_WRITE_BARRIER */ regMask = op2Mask; if (op1->gtOper == GT_IND) { GenTreePtr rv1, rv2; unsigned mul, cns; bool rev; /* Special handling of indirect assigns for write barrier */ bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2, &mul, &cns); /* Check address mode for enregisterable locals */ if (yes) { if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR) { rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum); } if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR) { rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum); } } } if (op2->gtOper == GT_LCL_VAR) { rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum); } // Add a register interference for REG_WRITE_BARRIER to each of the last use variables if (!VarSetOps::IsEmpty(this, rpLastUseVars)) { rpRecordRegIntf(RBM_WRITE_BARRIER, rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict")); } tree->gtUsedRegs |= RBM_WRITE_BARRIER; #ifdef DEBUG } else #endif // DEBUG #endif // NOGC_WRITE_BARRIERS #if defined(DEBUG) || !NOGC_WRITE_BARRIERS { #ifdef _TARGET_ARM_ #ifdef DEBUG if (verbose) printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n"); #endif // // For the ARM target we have an optimized JIT Helper // that only trashes a subset of the callee saved registers // // NOTE: Adding it to the gtUsedRegs will cause the interference to // be added appropriately // the RBM_CALLEE_TRASH_NOGC set is killed. We will record this in interferingRegs // instead of gtUsedRegs, because the latter will be modified later, but we need // to remember to add the interference. interferingRegs |= RBM_CALLEE_TRASH_NOGC; op1->gtUsedRegs |= RBM_R0; op2->gtUsedRegs |= RBM_R1; #else // _TARGET_ARM_ #ifdef DEBUG if (verbose) printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n"); #endif // We have to call a normal JIT helper to perform the Write Barrier Assignment // It will trash the callee saved registers tree->gtUsedRegs |= RBM_CALLEE_TRASH; #endif // _TARGET_ARM_ } #endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS } if (simpleAssignment) { /* * Consider a simple assignment to a local: * * lcl = expr; * * Since the "=" node is visited after the variable * is marked live (assuming it's live after the * assignment), we don't want to use the register * use mask of the "=" node but rather that of the * variable itself. */ tree->gtUsedRegs = op1->gtUsedRegs; } else { tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs; } VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars); goto RETURN_CHECK; case GT_ASG_LSH: case GT_ASG_RSH: case GT_ASG_RSZ: /* assigning shift operators */ noway_assert(type != TYP_LONG); #if CPU_LOAD_STORE_ARCH predictReg = PREDICT_ADDR; #else predictReg = PREDICT_NONE; #endif /* shift count is handled same as ordinary shift */ goto HANDLE_SHIFT_COUNT; case GT_ADDR: regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE); if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG)) { // We need a scratch register for the LEA instruction regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs); } tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask; goto RETURN_CHECK; case GT_CAST: /* Cannot cast to VOID */ noway_assert(type != TYP_VOID); /* cast to long is special */ if (type == TYP_LONG && op1->gtType <= TYP_INT) { noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG); #if CPU_LONG_USES_REGPAIR rpPredictReg predictRegHi = PREDICT_SCRATCH_REG; if (rpHasVarIndexForPredict(predictReg)) { unsigned tgtIndex = rpGetVarIndexForPredict(predictReg); rpAsgVarNum = tgtIndex; // We don't need any register as we plan on writing to the rpAsgVarNum register predictReg = PREDICT_NONE; LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex]; tgtVar->lvDependReg = true; if (tgtVar->lvOtherReg != REG_STK) { predictRegHi = PREDICT_NONE; } } else #endif if (predictReg == PREDICT_NONE) { predictReg = PREDICT_SCRATCH_REG; } #ifdef _TARGET_ARM_ // If we are widening an int into a long using a targeted register pair we // should retarget so that the low part get loaded into the appropriate register else if (predictReg == PREDICT_PAIR_R0R1) { predictReg = PREDICT_REG_R0; predictRegHi = PREDICT_REG_R1; } else if (predictReg == PREDICT_PAIR_R2R3) { predictReg = PREDICT_REG_R2; predictRegHi = PREDICT_REG_R3; } #endif #ifdef _TARGET_X86_ // If we are widening an int into a long using a targeted register pair we // should retarget so that the low part get loaded into the appropriate register else if (predictReg == PREDICT_PAIR_EAXEDX) { predictReg = PREDICT_REG_EAX; predictRegHi = PREDICT_REG_EDX; } else if (predictReg == PREDICT_PAIR_ECXEBX) { predictReg = PREDICT_REG_ECX; predictRegHi = PREDICT_REG_EBX; } #endif regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); #if CPU_LONG_USES_REGPAIR if (predictRegHi != PREDICT_NONE) { // Now get one more reg for the upper part regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask); } #endif tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask; goto RETURN_CHECK; } /* cast from long is special - it frees a register */ if (type <= TYP_INT // nice. this presumably is intended to mean "signed int and shorter types" && op1->gtType == TYP_LONG) { if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg)) predictReg = PREDICT_REG; regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); // If we have 2 or more regs, free one of them if (!genMaxOneBit(regMask)) { /* Clear the 2nd lowest bit in regMask */ /* First set tmpMask to the lowest bit in regMask */ tmpMask = genFindLowestBit(regMask); /* Next find the second lowest bit in regMask */ tmpMask = genFindLowestBit(regMask & ~tmpMask); /* Clear this bit from regmask */ regMask &= ~tmpMask; } tree->gtUsedRegs = op1->gtUsedRegs; goto RETURN_CHECK; } #if CPU_HAS_BYTE_REGS /* cast from signed-byte is special - it uses byteable registers */ if (type == TYP_INT) { var_types smallType; if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType)) smallType = tree->gtCast.CastOp()->TypeGet(); else smallType = tree->gtCast.gtCastType; if (smallType == TYP_BYTE) { regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); if ((regMask & RBM_BYTE_REGS) == 0) regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS); tree->gtUsedRegs = (regMaskSmall)regMask; goto RETURN_CHECK; } } #endif #if FEATURE_STACK_FP_X87 /* cast to float/double is special */ if (varTypeIsFloating(type)) { switch (op1->TypeGet()) { /* uses fild, so don't need to be loaded to reg */ case TYP_INT: case TYP_LONG: rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs); tree->gtUsedRegs = op1->gtUsedRegs; regMask = 0; goto RETURN_CHECK; default: break; } } /* Casting from integral type to floating type is special */ if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet())) { if (opts.compCanUseSSE2) { // predict for SSE2 based casting if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); // Get one more int reg to hold cast result regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask); tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask; goto RETURN_CHECK; } } #endif #if FEATURE_FP_REGALLOC // Are we casting between int to float or float to int // Fix 388428 ARM JitStress WP7 if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet())) { // op1 needs to go into a register regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs); #ifdef _TARGET_ARM_ if (varTypeIsFloating(op1->TypeGet())) { // We also need a fp scratch register for the convert operation regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs); } #endif // We also need a register to hold the result regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs); tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask; goto RETURN_CHECK; } #endif /* otherwise must load op1 into a register */ goto GENERIC_UNARY; case GT_INTRINSIC: #ifdef _TARGET_XARCH_ if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT) { // This is a special case to handle the following // optimization: conv.i4(round.d(d)) -> round.i(d) // if flowgraph 3186 if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs); tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask; goto RETURN_CHECK; } #endif __fallthrough; case GT_NEG: #ifdef _TARGET_ARM_ if (tree->TypeGet() == TYP_LONG) { // On ARM this consumes an extra register for the '0' value if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs); tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask; goto RETURN_CHECK; } #endif // _TARGET_ARM_ __fallthrough; case GT_NOT: // these unary operators will write new values // and thus will need a scratch register GENERIC_UNARY: /* generic unary operators */ if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; __fallthrough; case GT_NOP: // these unary operators do not write new values // and thus won't need a scratch register CLANG_FORMAT_COMMENT_ANCHOR; #if OPT_BOOL_OPS if (!op1) { tree->gtUsedRegs = 0; regMask = 0; goto RETURN_CHECK; } #endif regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); tree->gtUsedRegs = op1->gtUsedRegs; goto RETURN_CHECK; case GT_IND: case GT_NULLCHECK: // At this point, nullcheck is just like an IND... { bool intoReg = true; VARSET_TP VARSET_INIT(this, startIndUseInPlaceVars, rpUseInPlace); if (fgIsIndirOfAddrOfLocal(tree) != NULL) { compUpdateLifeVar(tree); } if (predictReg == PREDICT_ADDR) { intoReg = false; } else if (predictReg == PREDICT_NONE) { if (type != TYP_LONG) { intoReg = false; } else { predictReg = PREDICT_REG; } } /* forcing to register? */ if (intoReg && (type != TYP_LONG)) { rsvdRegs |= RBM_LASTUSE; } GenTreePtr lenCSE; lenCSE = NULL; /* check for address mode */ regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE); tmpMask = RBM_NONE; #if CPU_LOAD_STORE_ARCH // We may need a scratch register for loading a long if (type == TYP_LONG) { /* This scratch register immediately dies */ tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs); } #endif // CPU_LOAD_STORE_ARCH #ifdef _TARGET_ARM_ // Unaligned loads/stores for floating point values must first be loaded into integer register(s) // if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type)) { /* These integer register(s) immediately die */ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs); // Two integer registers are required for a TYP_DOUBLE if (type == TYP_DOUBLE) tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask); } #endif /* forcing to register? */ if (intoReg) { regMaskTP lockedMask = lockedRegs | rsvdRegs; tmpMask |= regMask; // We will compute a new regMask that holds the register(s) // that we will load the indirection into. // CLANG_FORMAT_COMMENT_ANCHOR; #ifndef _TARGET_64BIT_ if (type == TYP_LONG) { // We need to use multiple load instructions here: // For the first register we can not choose // any registers that are being used in place or // any register in the current regMask // regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask); // For the second register we can choose a register that was // used in place or any register in the old now overwritten regMask // but not the same register that we picked above in 'regMask' // VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars); regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask); } else #endif { // We will use one load instruction here: // The load target register can be a register that was used in place // or one of the register from the orginal regMask. // VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars); regMask = rpPredictRegPick(type, predictReg, lockedMask); } } else if (predictReg != PREDICT_ADDR) { /* Unless the caller specified PREDICT_ADDR */ /* we don't return the temp registers used */ /* to form the address */ regMask = RBM_NONE; } } tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask); goto RETURN_CHECK; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: #ifdef _TARGET_X86_ /* Floating point comparison uses EAX for flags */ if (varTypeIsFloating(op1->TypeGet())) { regMask = RBM_EAX; } else #endif if (!(tree->gtFlags & GTF_RELOP_JMP_USED)) { // Some comparisons are converted to ?: noway_assert(!fgMorphRelopToQmark(op1)); if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; // The set instructions need a byte register regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs); } else { regMask = RBM_NONE; #ifdef _TARGET_XARCH_ tmpMask = RBM_NONE; // Optimize the compare with a constant cases for xarch if (op1->gtOper == GT_CNS_INT) { if (op2->gtOper == GT_CNS_INT) tmpMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs); rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE); tree->gtUsedRegs = op2->gtUsedRegs; goto RETURN_CHECK; } else if (op2->gtOper == GT_CNS_INT) { rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs); tree->gtUsedRegs = op1->gtUsedRegs; goto RETURN_CHECK; } else if (op2->gtOper == GT_CNS_LNG) { regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs); #ifdef _TARGET_X86_ // We also need one extra register to read values from tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs); #endif // _TARGET_X86_ tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs; goto RETURN_CHECK; } #endif // _TARGET_XARCH_ } unsigned op1TypeSize; unsigned op2TypeSize; op1TypeSize = genTypeSize(op1->TypeGet()); op2TypeSize = genTypeSize(op2->TypeGet()); op1PredictReg = PREDICT_REG; op2PredictReg = PREDICT_REG; if (tree->gtFlags & GTF_REVERSE_OPS) { #ifdef _TARGET_XARCH_ if (op1TypeSize == sizeof(int)) op1PredictReg = PREDICT_NONE; #endif tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs); rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE); } else { #ifdef _TARGET_XARCH_ // For full DWORD compares we can have // // op1 is an address mode and op2 is a register // or // op1 is a register and op2 is an address mode // if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize)) { if (op2->gtOper == GT_LCL_VAR) { unsigned lclNum = op2->gtLclVar.gtLclNum; varDsc = lvaTable + lclNum; /* Did we predict that this local will be enregistered? */ if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK)) { op1PredictReg = PREDICT_ADDR; } } } // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int))) op2PredictReg = PREDICT_ADDR; #endif // _TARGET_XARCH_ tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs); #ifdef _TARGET_ARM_ if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal)) #endif { rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE); } } #ifdef _TARGET_XARCH_ // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg()) // to generate a sign/zero extension before doing a compare. Save a register for this purpose // if one of the registers is small and the types aren't equal. if (regMask == RBM_NONE) { rpPredictReg op1xPredictReg, op2xPredictReg; GenTreePtr op1x, op2x; if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case? { op1xPredictReg = op2PredictReg; op2xPredictReg = op1PredictReg; op1x = op2; op2x = op1; } else { op1xPredictReg = op1PredictReg; op2xPredictReg = op2PredictReg; op1x = op1; op2x = op2; } if ((op1xPredictReg < PREDICT_REG) && // op1 doesn't get a register (probably an indir) (op2xPredictReg >= PREDICT_REG) && // op2 gets a register varTypeIsSmall(op1x->TypeGet())) // op1 is smaller than an int { bool needTmp = false; // If op1x is a byte, and op2x is not a byteable register, we'll need a temp. // We could predict a byteable register for op2x, but what if we don't get it? // So, be conservative and always ask for a temp. There are a couple small CQ losses as a // result. if (varTypeIsByte(op1x->TypeGet())) { needTmp = true; } else { if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation { if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum)) needTmp = true; } else { if (op1x->TypeGet() != op2x->TypeGet()) needTmp = true; } } if (needTmp) { regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs); } } } #endif // _TARGET_XARCH_ tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs; goto RETURN_CHECK; case GT_MUL: #ifndef _TARGET_AMD64_ if (type == TYP_LONG) { assert(tree->gtIsValid64RsltMul()); /* Strip out the cast nodes */ noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST); op1 = op1->gtCast.CastOp(); op2 = op2->gtCast.CastOp(); #else if (false) { #endif // !_TARGET_AMD64_ USE_MULT_EAX: #if defined(_TARGET_X86_) // This will done by a 64-bit imul "imul eax, reg" // (i.e. EDX:EAX = EAX * reg) /* Are we supposed to evaluate op2 first? */ if (tree->gtFlags & GTF_REVERSE_OPS) { rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs); rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE); } else { rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs); rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE); } /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */ tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs; /* set regMask to the set of held registers */ regMask = RBM_PAIR_TMP_LO; if (type == TYP_LONG) regMask |= RBM_PAIR_TMP_HI; #elif defined(_TARGET_ARM_) // This will done by a 4 operand multiply // Are we supposed to evaluate op2 first? if (tree->gtFlags & GTF_REVERSE_OPS) { rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs); rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE); } else { rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs); rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE); } // set regMask to the set of held registers, // the two scratch register we need to compute the mul result regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs); // set gtUsedRegs toregMask and the registers needed by op1 and op2 tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs; #else // !_TARGET_X86_ && !_TARGET_ARM_ #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul" #endif goto RETURN_CHECK; } else { /* We use imulEAX for most unsigned multiply operations */ if (tree->gtOverflow()) { if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet())) { goto USE_MULT_EAX; } } } __fallthrough; case GT_OR: case GT_XOR: case GT_AND: case GT_SUB: case GT_ADD: tree->gtUsedRegs = 0; if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; GENERIC_BINARY: noway_assert(op2); if (tree->gtFlags & GTF_REVERSE_OPS) { op1PredictReg = PREDICT_REG; #if !CPU_LOAD_STORE_ARCH if (genTypeSize(op1->gtType) >= sizeof(int)) op1PredictReg = PREDICT_NONE; #endif regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs); rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE); } else { op2PredictReg = PREDICT_REG; #if !CPU_LOAD_STORE_ARCH if (genTypeSize(op2->gtType) >= sizeof(int)) op2PredictReg = PREDICT_NONE; #endif regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs); #ifdef _TARGET_ARM_ // For most ALU operations we can generate a single instruction that encodes // a small immediate integer constant value. (except for multiply) // if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL)) { ssize_t ival = op2->gtIntCon.gtIconVal; if (codeGen->validImmForAlu(ival)) { op2PredictReg = PREDICT_NONE; } else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) && ((oper == GT_ADD) || (oper == GT_SUB))) { op2PredictReg = PREDICT_NONE; } } if (op2PredictReg == PREDICT_NONE) { op2->gtUsedRegs = RBM_NONE; } else #endif { rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE); } } tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs; #if CPU_HAS_BYTE_REGS /* We have special register requirements for byte operations */ if (varTypeIsByte(tree->TypeGet())) { /* For 8 bit arithmetic, one operands has to be in a byte-addressable register, and the other has to be in a byte-addrble reg or in memory. Assume its in a reg */ regMaskTP regByteMask = 0; regMaskTP op1ByteMask = op1->gtUsedRegs; if (!(op1->gtUsedRegs & RBM_BYTE_REGS)) { // Pick a Byte register to use for op1 regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs); op1ByteMask = regByteMask; } if (!(op2->gtUsedRegs & RBM_BYTE_REGS)) { // Pick a Byte register to use for op2, avoiding the one used by op1 regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask); } if (regByteMask) { tree->gtUsedRegs |= regByteMask; regMask = regByteMask; } } #endif goto RETURN_CHECK; case GT_DIV: case GT_MOD: case GT_UDIV: case GT_UMOD: /* non-integer division handled in generic way */ if (!varTypeIsIntegral(type)) { tree->gtUsedRegs = 0; if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; goto GENERIC_BINARY; } #ifndef _TARGET_64BIT_ if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD)) { /* Special case: a mod with an int op2 is done inline using idiv or div to avoid a costly call to the helper */ noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal))); #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) if (tree->gtFlags & GTF_REVERSE_OPS) { tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP, rsvdRegs | op1->gtRsvdRegs); tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE); } else { tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs); tmpMask |= rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE); } regMask = RBM_PAIR_TMP; #else // !_TARGET_X86_ && !_TARGET_ARM_ #error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD" #endif // !_TARGET_X86_ && !_TARGET_ARM_ tree->gtUsedRegs = (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs | rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask)); goto RETURN_CHECK; } #endif // _TARGET_64BIT_ /* no divide immediate, so force integer constant which is not * a power of two to register */ if (op2->OperKind() & GTK_CONST) { ssize_t ival = op2->gtIntConCommon.IconValue(); /* Is the divisor a power of 2 ? */ if (ival > 0 && genMaxOneBit(size_t(ival))) { goto GENERIC_UNARY; } else op2PredictReg = PREDICT_SCRATCH_REG; } else { /* Non integer constant also must be enregistered */ op2PredictReg = PREDICT_REG; } regMaskTP trashedMask; trashedMask = DUMMY_INIT(RBM_ILLEGAL); regMaskTP op1ExcludeMask; op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL); regMaskTP op2ExcludeMask; op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL); #ifdef _TARGET_XARCH_ /* Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before * we can safely allow the "b" value to die. Unfortunately, if we simply * mark the node "b" as using EDX, this will not work if "b" is a register * variable that dies with this particular reference. Thus, if we want to * avoid this situation (where we would have to spill the variable from * EDX to someplace else), we need to explicitly mark the interference * of the variable at this point. */ if (op2->gtOper == GT_LCL_VAR) { unsigned lclNum = op2->gtLclVarCommon.gtLclNum; varDsc = lvaTable + lclNum; if (varDsc->lvTracked) { #ifdef DEBUG if (verbose) { if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex)) printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum, varDsc->lvVarIndex); if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex)) printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum, varDsc->lvVarIndex); } #endif VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex); VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex); } } /* set the held register based on opcode */ if (oper == GT_DIV || oper == GT_UDIV) regMask = RBM_EAX; else regMask = RBM_EDX; trashedMask = (RBM_EAX | RBM_EDX); op1ExcludeMask = 0; op2ExcludeMask = (RBM_EAX | RBM_EDX); #endif // _TARGET_XARCH_ #ifdef _TARGET_ARM_ trashedMask = RBM_NONE; op1ExcludeMask = RBM_NONE; op2ExcludeMask = RBM_NONE; #endif /* set the lvPref reg if possible */ GenTreePtr dest; /* * Walking the gtNext link twice from here should get us back * to our parent node, if this is an simple assignment tree. */ dest = tree->gtNext; if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) && dest->gtNext->gtOp.gtOp2 == tree) { varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum; varDsc->addPrefReg(regMask, this); } #ifdef _TARGET_XARCH_ op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */ #else op1PredictReg = PREDICT_SCRATCH_REG; #endif /* are we supposed to evaluate op2 first? */ if (tree->gtFlags & GTF_REVERSE_OPS) { tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask, rsvdRegs | op1->gtRsvdRegs); rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE); } else { tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask, rsvdRegs | op2->gtRsvdRegs); rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE); } #ifdef _TARGET_ARM_ regMask = tmpMask; #endif /* grab EAX, EDX for this tree node */ tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs; goto RETURN_CHECK; case GT_LSH: case GT_RSH: case GT_RSZ: if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; #ifndef _TARGET_64BIT_ if (type == TYP_LONG) { if (op2->IsCnsIntOrI()) { regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); // no register used by op2 op2->gtUsedRegs = 0; tree->gtUsedRegs = op1->gtUsedRegs; } else { // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers tmpMask = lockedRegs; tmpMask &= ~RBM_LNGARG_0; tmpMask &= ~RBM_SHIFT_LNG; // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair if (tree->gtFlags & GTF_REVERSE_OPS) { rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE); tmpMask |= RBM_SHIFT_LNG; // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife // Fix 383843 X86/ARM ILGEN rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup")); rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE); } else { rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE); tmpMask |= RBM_LNGARG_0; // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife // Fix 383839 ARM ILGEN rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup")); rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE); } regMask = RBM_LNGRET; // function return registers op1->gtUsedRegs |= RBM_LNGARG_0; op2->gtUsedRegs |= RBM_SHIFT_LNG; tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs; // We are using a helper function to do shift: // tree->gtUsedRegs |= RBM_CALLEE_TRASH; } } else #endif // _TARGET_64BIT_ { #ifdef _TARGET_XARCH_ if (!op2->IsCnsIntOrI()) predictReg = PREDICT_NOT_REG_ECX; #endif HANDLE_SHIFT_COUNT: // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH) regMaskTP tmpRsvdRegs; if ((tree->gtFlags & GTF_REVERSE_OPS) == 0) { regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs); rsvdRegs = RBM_LASTUSE; tmpRsvdRegs = RBM_NONE; } else { regMask = RBM_NONE; // Special case op1 of a constant if (op1->IsCnsIntOrI()) tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See // System.Xml.Schema.BitSet:Get(int):bool else tmpRsvdRegs = op1->gtRsvdRegs; } op2Mask = RBM_NONE; if (!op2->IsCnsIntOrI()) { if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0)) { op2PredictReg = PREDICT_REG_SHIFT; } else { op2PredictReg = PREDICT_REG; } /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */ op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs); // If our target arch has a REG_SHIFT register then // we set the PrefReg when we have a LclVar for op2 // we add an interference with REG_SHIFT for any other LclVars alive at op2 if (REG_SHIFT != REG_NA) { VARSET_TP VARSET_INIT(this, liveSet, compCurLife); while (op2->gtOper == GT_COMMA) { op2 = op2->gtOp.gtOp2; } if (op2->gtOper == GT_LCL_VAR) { varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum; varDsc->setPrefReg(REG_SHIFT, this); if (varDsc->lvTracked) { VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex); } } // Ensure that we have a register interference with the LclVar in tree's LiveSet, // excluding the LclVar that was used for the shift amount as it is read-only // and can be kept alive through the shift operation // rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register")); // In case op2Mask doesn't contain the required shift register, // we will or it in now. op2Mask |= RBM_SHIFT; } } if (tree->gtFlags & GTF_REVERSE_OPS) { assert(regMask == RBM_NONE); regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE); } #if CPU_HAS_BYTE_REGS if (varTypeIsByte(type)) { // Fix 383789 X86 ILGEN // Fix 383813 X86 ILGEN // Fix 383828 X86 ILGEN if (op1->gtOper == GT_LCL_VAR) { varDsc = lvaTable + op1->gtLclVar.gtLclNum; if (varDsc->lvTracked) { VARSET_TP VARSET_INIT_NOCOPY(op1VarBit, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)); // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register")); } } if ((regMask & RBM_BYTE_REGS) == 0) { // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX) // and we can't select one that is already reserved (i.e. lockedRegs or regMask) // regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS)); } } #endif tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask); } goto RETURN_CHECK; case GT_COMMA: if (tree->gtFlags & GTF_REVERSE_OPS) { if (predictReg == PREDICT_NONE) { predictReg = PREDICT_REG; } else if (rpHasVarIndexForPredict(predictReg)) { /* Don't propagate the use of tgt reg use in a GT_COMMA */ predictReg = PREDICT_SCRATCH_REG; } regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs); rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE); } else { rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE); /* CodeGen will enregister the op2 side of a GT_COMMA */ if (predictReg == PREDICT_NONE) { predictReg = PREDICT_REG; } else if (rpHasVarIndexForPredict(predictReg)) { /* Don't propagate the use of tgt reg use in a GT_COMMA */ predictReg = PREDICT_SCRATCH_REG; } regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs); } // tree should only accumulate the used registers from the op2 side of the GT_COMMA // tree->gtUsedRegs = op2->gtUsedRegs; if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0)) { LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum; if (op2VarDsc->lvTracked) { VARSET_TP VARSET_INIT_NOCOPY(op2VarBit, VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex)); rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use")); } } goto RETURN_CHECK; case GT_QMARK: { noway_assert(op1 != NULL && op2 != NULL); /* * If the gtUsedRegs conflicts with lockedRegs * then we going to have to spill some registers * into the non-trashed register set to keep it alive */ unsigned spillCnt; spillCnt = 0; regMaskTP spillRegs; spillRegs = lockedRegs & tree->gtUsedRegs; while (spillRegs) { /* Find the next register that needs to be spilled */ tmpMask = genFindLowestBit(spillRegs); #ifdef DEBUG if (verbose) { printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask))); gtDispTree(tree, 0, NULL, true); } #endif /* In Codegen it will typically introduce a spill temp here */ /* rather than relocating the register to a non trashed reg */ rpPredictSpillCnt++; spillCnt++; /* Remove it from the spillRegs and lockedRegs*/ spillRegs &= ~tmpMask; lockedRegs &= ~tmpMask; } { VARSET_TP VARSET_INIT(this, startQmarkCondUseInPlaceVars, rpUseInPlace); /* Evaluate the subtree */ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE); VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars); tree->gtUsedRegs = op1->gtUsedRegs; noway_assert(op2->gtOper == GT_COLON); if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0)) { // Don't try to target the register specified in predictReg when we have complex subtrees // predictReg = PREDICT_SCRATCH_REG; } GenTreePtr elseTree = op2->AsColon()->ElseNode(); GenTreePtr thenTree = op2->AsColon()->ThenNode(); noway_assert(thenTree != NULL && elseTree != NULL); // Update compCurLife to only those vars live on the subtree VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet); if (type == TYP_VOID) { /* Evaluate the subtree */ rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE); regMask = RBM_NONE; predictReg = PREDICT_NONE; } else { // A mask to use to force the predictor to choose low registers (to reduce code size) regMaskTP avoidRegs = RBM_NONE; #ifdef _TARGET_ARM_ avoidRegs = (RBM_R12 | RBM_LR); #endif if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; /* Evaluate the subtree */ regMask = rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE); if (regMask) { rpPredictReg op1PredictReg = rpGetPredictForMask(regMask); if (op1PredictReg != PREDICT_NONE) predictReg = op1PredictReg; } } VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars); /* Evaluate the subtree */ // First record the post-then liveness, and reset the current liveness to the else // branch liveness. CLANG_FORMAT_COMMENT_ANCHOR; #ifdef DEBUG VARSET_TP VARSET_INIT(this, postThenLive, compCurLife); #endif VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet); rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE); tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs; // The then and the else are "virtual basic blocks" that form a control-flow diamond. // They each have only one successor, which they share. Their live-out sets must equal the // live-in set of this virtual successor block, and thus must be the same. We can assert // that equality here. assert(VarSetOps::Equal(this, compCurLife, postThenLive)); if (spillCnt > 0) { regMaskTP reloadMask = RBM_NONE; while (spillCnt) { regMaskTP reloadReg; /* Get an extra register to hold it */ reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask); #ifdef DEBUG if (verbose) { printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg))); gtDispTree(tree, 0, NULL, true); } #endif reloadMask |= reloadReg; spillCnt--; } /* update the gtUsedRegs mask */ tree->gtUsedRegs |= reloadMask; } } goto RETURN_CHECK; } case GT_RETURN: tree->gtUsedRegs = RBM_NONE; regMask = RBM_NONE; /* Is there a return value? */ if (op1 != NULL) { #if FEATURE_FP_REGALLOC if (varTypeIsFloating(type)) { predictReg = PREDICT_FLTRET; if (type == TYP_FLOAT) regMask = RBM_FLOATRET; else regMask = RBM_DOUBLERET; } else #endif if (isRegPairType(type)) { predictReg = PREDICT_LNGRET; regMask = RBM_LNGRET; } else { predictReg = PREDICT_INTRET; regMask = RBM_INTRET; } if (info.compCallUnmanaged) { lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); } rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE); tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask; } #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED) // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED. // We could optimize on registers based on int/long or no return value. But to // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here. if (compIsProfilerHookNeeded()) { tree->gtUsedRegs |= RBM_PROFILER_RET_USED; } #endif goto RETURN_CHECK; case GT_RETFILT: if (op1 != NULL) { rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE); regMask = genReturnRegForTree(tree); tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask; goto RETURN_CHECK; } tree->gtUsedRegs = 0; regMask = 0; goto RETURN_CHECK; case GT_JTRUE: /* This must be a test of a relational operator */ noway_assert(op1->OperIsCompare()); /* Only condition code set by this operation */ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE); tree->gtUsedRegs = op1->gtUsedRegs; regMask = 0; goto RETURN_CHECK; case GT_SWITCH: noway_assert(type <= TYP_INT); noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH); #ifdef _TARGET_ARM_ { regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE); unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount; if (jumpCnt > 2) { // Table based switch requires an extra register for the table base regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask); } tree->gtUsedRegs = op1->gtUsedRegs | regMask; } #else // !_TARGET_ARM_ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE); tree->gtUsedRegs = op1->gtUsedRegs; #endif // _TARGET_ARM_ regMask = 0; goto RETURN_CHECK; case GT_CKFINITE: if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); // Need a reg to load exponent into regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs); tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs; goto RETURN_CHECK; case GT_LCLHEAP: regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs); op2Mask = 0; #ifdef _TARGET_ARM_ if (info.compInitMem) { // We zero out two registers in the ARM codegen path op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask); } #endif op1->gtUsedRegs |= (regMaskSmall)regMask; tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask; // The result will be put in the reg we picked for the size // regMask = goto RETURN_CHECK; case GT_OBJ: { #ifdef _TARGET_ARM_ if (predictReg <= PREDICT_REG) predictReg = PREDICT_SCRATCH_REG; regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low // registers (to reduce code size) regMask = RBM_NONE; tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs); #endif if (fgIsIndirOfAddrOfLocal(tree) != NULL) { compUpdateLifeVar(tree); } #ifdef _TARGET_ARM_ unsigned objSize = info.compCompHnd->getClassSize(tree->gtObj.gtClass); regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL); // If it has one bit set, and that's an arg reg... if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0)) { // We are passing the 'obj' in the argument registers // regNumber rn = genRegNumFromMask(preferReg); // Add the registers used to pass the 'obj' to regMask. for (unsigned i = 0; i < objSize / 4; i++) { if (rn == MAX_REG_ARG) break; // Otherwise... regMask |= genRegMask(rn); rn = genRegArgNext(rn); } } else { // We are passing the 'obj' in the outgoing arg space // We will need one register to load into unless the 'obj' size is 4 or less. // if (objSize > 4) { regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs); } } tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask); goto RETURN_CHECK; #else // !_TARGET_ARM goto GENERIC_UNARY; #endif // _TARGET_ARM_ } case GT_MKREFANY: { #ifdef _TARGET_ARM_ regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL); regMask = RBM_NONE; if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0)) { // A MKREFANY takes up two registers. regNumber rn = genRegNumFromMask(preferReg); regMask = RBM_NONE; if (rn < MAX_REG_ARG) { regMask |= genRegMask(rn); rn = genRegArgNext(rn); if (rn < MAX_REG_ARG) regMask |= genRegMask(rn); } } if (regMask != RBM_NONE) { // Condensation of GENERIC_BINARY path. assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); op2PredictReg = PREDICT_REG; regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs); rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE); regMask |= op1->gtUsedRegs | op2->gtUsedRegs; tree->gtUsedRegs = (regMaskSmall)regMask; goto RETURN_CHECK; } tree->gtUsedRegs = op1->gtUsedRegs; #endif // _TARGET_ARM_ goto GENERIC_BINARY; } case GT_BOX: goto GENERIC_UNARY; case GT_LOCKADD: goto GENERIC_BINARY; case GT_XADD: case GT_XCHG: // Ensure we can write to op2. op2 will hold the output. if (predictReg < PREDICT_SCRATCH_REG) predictReg = PREDICT_SCRATCH_REG; if (tree->gtFlags & GTF_REVERSE_OPS) { op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs); regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask); } else { regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs); op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask); } tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask); goto RETURN_CHECK; case GT_ARR_LENGTH: goto GENERIC_UNARY; case GT_INIT_VAL: // This unary operator simply passes through the value from its child (much like GT_NOP) // and thus won't need a scratch register. regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); tree->gtUsedRegs = op1->gtUsedRegs; goto RETURN_CHECK; default: #ifdef DEBUG gtDispTree(tree); #endif noway_assert(!"unexpected simple operator in reg use prediction"); break; } } /* See what kind of a special operator we have here */ switch (oper) { GenTreePtr args; GenTreeArgList* list; regMaskTP keepMask; unsigned regArgsNum; int regIndex; regMaskTP regArgMask; regMaskTP curArgMask; case GT_CALL: { /* initialize so we can just or in various bits */ tree->gtUsedRegs = RBM_NONE; #if GTF_CALL_REG_SAVE /* * Unless the GTF_CALL_REG_SAVE flag is set, * we can't preserve the RBM_CALLEE_TRASH registers. * (likewise we can't preserve the return registers) * So we remove them from the lockedRegs set and * record any of them in the keepMask */ if (tree->gtFlags & GTF_CALL_REG_SAVE) { regMaskTP trashMask = genReturnRegForTree(tree); keepMask = lockedRegs & trashMask; lockedRegs &= ~trashMask; } else #endif { keepMask = lockedRegs & RBM_CALLEE_TRASH; lockedRegs &= ~RBM_CALLEE_TRASH; } regArgsNum = 0; regIndex = 0; /* Is there an object pointer? */ if (tree->gtCall.gtCallObjp) { /* Evaluate the instance pointer first */ args = tree->gtCall.gtCallObjp; /* the objPtr always goes to an integer register (through temp or directly) */ noway_assert(regArgsNum == 0); regArgsNum++; /* Must be passed in a register */ noway_assert(args->gtFlags & GTF_LATE_ARG); /* Must be either a deferred reg arg node or a GT_ASG node */ noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) || args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA)); if (!args->IsArgPlaceHolderNode()) { rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE); } } VARSET_TP VARSET_INIT_NOCOPY(startArgUseInPlaceVars, VarSetOps::UninitVal()); VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace); /* process argument list */ for (list = tree->gtCall.gtCallArgs; list; list = list->Rest()) { args = list->Current(); if (args->gtFlags & GTF_LATE_ARG) { /* Must be either a Placeholder/NOP node or a GT_ASG node */ noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) || args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA)); if (!args->IsArgPlaceHolderNode()) { rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE); } regArgsNum++; } else { #ifdef FEATURE_FIXED_OUT_ARGS // We'll store this argument into the outgoing argument area // It needs to be in a register to be stored. // predictReg = PREDICT_REG; #else // !FEATURE_FIXED_OUT_ARGS // We'll generate a push for this argument // predictReg = PREDICT_NONE; if (varTypeIsSmall(args->TypeGet())) { /* We may need to sign or zero extend a small type using a register */ predictReg = PREDICT_SCRATCH_REG; } #endif rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE); } VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars); tree->gtUsedRegs |= args->gtUsedRegs; } /* Is there a late argument list */ regIndex = 0; regArgMask = RBM_NONE; // Set of argument registers that have already been setup. args = NULL; /* process the late argument list */ for (list = tree->gtCall.gtCallLateArgs; list; regIndex++) { // If the current argument being copied is a promoted struct local, set this pointer to its description. LclVarDsc* promotedStructLocal = NULL; curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg assert(list->OperIsList()); args = list->Current(); list = list->Rest(); assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs; fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree, args); assert(curArgTabEntry); regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument unsigned numSlots = curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument rpPredictReg argPredictReg; regMaskTP avoidReg = RBM_NONE; if (regNum != REG_STK) { argPredictReg = rpGetPredictForReg(regNum); curArgMask |= genRegMask(regNum); } else { assert(numSlots > 0); argPredictReg = PREDICT_NONE; #ifdef _TARGET_ARM_ // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat avoidReg = (RBM_R12 | RBM_LR); #endif } #ifdef _TARGET_ARM_ // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register // if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE))) { // 64-bit longs and doubles require 2 consecutive argument registers curArgMask |= genRegMask(REG_NEXT(regNum)); } else if (args->TypeGet() == TYP_STRUCT) { GenTreePtr argx = args; GenTreePtr lclVarTree = NULL; /* The GT_OBJ may be be a child of a GT_COMMA */ while (argx->gtOper == GT_COMMA) { argx = argx->gtOp.gtOp2; } unsigned originalSize = 0; if (argx->gtOper == GT_OBJ) { originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass); // Is it the address of a promoted struct local? if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR) { lclVarTree = argx->gtObj.gtOp1->gtOp.gtOp1; LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum]; if (varDsc->lvPromoted) promotedStructLocal = varDsc; } } else if (argx->gtOper == GT_LCL_VAR) { varDsc = lvaTable + argx->gtLclVarCommon.gtLclNum; originalSize = varDsc->lvSize(); // Is it a promoted struct local? if (varDsc->lvPromoted) promotedStructLocal = varDsc; } else if (argx->gtOper == GT_MKREFANY) { originalSize = 2 * TARGET_POINTER_SIZE; } else { noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind"); } // We only pass arguments differently if it a struct local "independently" promoted, which // allows the field locals can be independently enregistered. if (promotedStructLocal != NULL) { if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT) promotedStructLocal = NULL; } unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES; // Are we passing a TYP_STRUCT in multiple integer registers? // if so set up curArgMask to reflect this // Also slots is updated to reflect the number of outgoing arg slots that we will write if (regNum != REG_STK) { regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST; assert(genIsValidReg(regNum)); regNumber nextReg = REG_NEXT(regNum); slots--; while (slots > 0 && nextReg <= regLast) { curArgMask |= genRegMask(nextReg); nextReg = REG_NEXT(nextReg); slots--; } } if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE)) { // All or a portion of this struct will be placed in the argument registers indicated by // "curArgMask". We build in knowledge of the order in which the code is generated here, so // that the second arg to be evaluated interferes with the reg for the first, the third with // the regs for the first and second, etc. But since we always place the stack slots before // placing the register slots we do not add inteferences for any part of the struct that gets // passed on the stack. argPredictReg = PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct regMaskTP prevArgMask = RBM_NONE; for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++) { LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i]; if (fieldVarDsc->lvTracked) { assert(lclVarTree != NULL); if (prevArgMask != RBM_NONE) { rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex) DEBUGARG("fieldVar/argReg")); } } // Now see many registers this uses up. unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE; unsigned nextAfterLastRegOffset = (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; unsigned nextAfterLastArgRegOffset = min(nextAfterLastRegOffset, genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG)); for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset; regOffset++) { prevArgMask |= genRegMask(regNumber(regNum + regOffset)); } if (nextAfterLastRegOffset > nextAfterLastArgRegOffset) { break; } if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0) { // Add the argument register used here as a preferred register for this fieldVarDsc // regNumber firstRegUsed = regNumber(regNum + firstRegOffset); fieldVarDsc->setPrefReg(firstRegUsed, this); } } compUpdateLifeVar(argx); } // If slots is greater than zero then part or all of this TYP_STRUCT // argument is passed in the outgoing argument area. (except HFA arg) // if ((slots > 0) && !curArgTabEntry->isHfaRegArg) { // We will need a register to address the TYP_STRUCT // Note that we can use an argument register in curArgMask as in // codegen we pass the stack portion of the argument before we // setup the register part. // // Force the predictor to choose a LOW_REG here to reduce code bloat avoidReg = (RBM_R12 | RBM_LR); assert(tmpMask == RBM_NONE); tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg); // If slots > 1 then we will need a second register to perform the load/store into the outgoing // arg area if (slots > 1) { tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | tmpMask | avoidReg); } } } // (args->TypeGet() == TYP_STRUCT) #endif // _TARGET_ARM_ // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ... // as we have already calculated the correct tmpMask and curArgMask values and // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences. // if (promotedStructLocal == NULL) { /* Target the appropriate argument register */ tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE); } // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live // for the duration of the OBJ. if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH)) { GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args); assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH. compUpdateLifeVar(lclVarTree); } regArgMask |= curArgMask; args->gtUsedRegs |= (tmpMask | regArgMask); tree->gtUsedRegs |= args->gtUsedRegs; tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs; if (args->gtUsedRegs != RBM_NONE) { // Add register interference with the set of registers used or in use when we evaluated // the current arg, with whatever is alive after the current arg // rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup")); } VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars); } assert(list == NULL); regMaskTP callAddrMask; callAddrMask = RBM_NONE; #if CPU_LOAD_STORE_ARCH predictReg = PREDICT_SCRATCH_REG; #else predictReg = PREDICT_NONE; #endif switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK) { case GTF_CALL_VIRT_STUB: // We only want to record an interference between the virtual stub // param reg and anything that's live AFTER the call, but we've not // yet processed the indirect target. So add RBM_VIRTUAL_STUB_PARAM // to interferingRegs. interferingRegs |= RBM_VIRTUAL_STUB_PARAM; #ifdef DEBUG if (verbose) printf("Adding interference with Virtual Stub Param\n"); #endif codeGen->regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM); if (tree->gtCall.gtCallType == CT_INDIRECT) { predictReg = PREDICT_REG_VIRTUAL_STUB_PARAM; } break; case GTF_CALL_VIRT_VTABLE: predictReg = PREDICT_SCRATCH_REG; break; case GTF_CALL_NONVIRT: predictReg = PREDICT_SCRATCH_REG; break; } if (tree->gtCall.gtCallType == CT_INDIRECT) { #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) if (tree->gtCall.gtCallCookie) { codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM); callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM, lockedRegs | regArgMask, RBM_LASTUSE); // Just in case we predict some other registers, force interference with our two special // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM); predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM; } #endif callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE); } else if (predictReg != PREDICT_NONE) { callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask); } if (tree->gtFlags & GTF_CALL_UNMANAGED) { // Need a register for tcbReg callAddrMask |= rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask); #if CPU_LOAD_STORE_ARCH // Need an extra register for tmpReg callAddrMask |= rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask); #endif } tree->gtUsedRegs |= callAddrMask; /* After the call restore the orginal value of lockedRegs */ lockedRegs |= keepMask; /* set the return register */ regMask = genReturnRegForTree(tree); if (regMask & rsvdRegs) { // We will need to relocate the return register value regMaskTP intRegMask = (regMask & RBM_ALLINT); #if FEATURE_FP_REGALLOC regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT); #endif regMask = RBM_NONE; if (intRegMask) { if (intRegMask == RBM_INTRET) { regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask); } else if (intRegMask == RBM_LNGRET) { regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask); } else { noway_assert(!"unexpected return regMask"); } } #if FEATURE_FP_REGALLOC if (floatRegMask) { if (floatRegMask == RBM_FLOATRET) { regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask); } else if (floatRegMask == RBM_DOUBLERET) { regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask); } else // HFA return case { for (unsigned f = 0; f < genCountBits(floatRegMask); f++) { regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask); } } } #endif } /* the return registers (if any) are killed */ tree->gtUsedRegs |= regMask; #if GTF_CALL_REG_SAVE if (!(tree->gtFlags & GTF_CALL_REG_SAVE)) #endif { /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */ tree->gtUsedRegs |= RBM_CALLEE_TRASH; } } #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED) // Mark required registers for emitting tailcall profiler callback as used if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC)) { tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED; } #endif break; case GT_ARR_ELEM: // Figure out which registers can't be touched unsigned dim; for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs; regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs); regMaskTP dimsMask; dimsMask = 0; #if CPU_LOAD_STORE_ARCH // We need a register to load the bounds of the MD array regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask); #endif for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) { /* We need scratch registers to compute index-lower_bound. Also, gtArrInds[0]'s register will be used as the second addressability register (besides gtArrObj's) */ regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask, rsvdRegs); if (dim == 0) regMask |= dimMask; dimsMask |= dimMask; } #ifdef _TARGET_XARCH_ // INS_imul doesnt have an immediate constant. if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize)) regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask); #endif tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask); break; case GT_CMPXCHG: { #ifdef _TARGET_XARCH_ rsvdRegs |= RBM_EAX; #endif if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR) { regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs); } else { regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs); } op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask); #ifdef _TARGET_XARCH_ rsvdRegs &= ~RBM_EAX; tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs, rsvdRegs | regMask | op2Mask); tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask); predictReg = PREDICT_REG_EAX; // When this is done the result is always in EAX. #else tmpMask = 0; tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask); #endif } break; case GT_ARR_BOUNDS_CHECK: { regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs; regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd); rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE); tree->gtUsedRegs = (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs; } break; default: NO_WAY("unexpected special operator in reg use prediction"); break; } RETURN_CHECK: #ifdef DEBUG /* make sure we set them to something reasonable */ if (tree->gtUsedRegs & RBM_ILLEGAL) noway_assert(!"used regs not set properly in reg use prediction"); if (regMask & RBM_ILLEGAL) noway_assert(!"return value not set propery in reg use prediction"); #endif /* * If the gtUsedRegs conflicts with lockedRegs * then we going to have to spill some registers * into the non-trashed register set to keep it alive */ regMaskTP spillMask; spillMask = tree->gtUsedRegs & lockedRegs; if (spillMask) { while (spillMask) { /* Find the next register that needs to be spilled */ tmpMask = genFindLowestBit(spillMask); #ifdef DEBUG if (verbose) { printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask))); gtDispTree(tree, 0, NULL, true); if ((tmpMask & regMask) == 0) { printf("Predict reload of %s after : ", getRegName(genRegNumFromMask(tmpMask))); gtDispTree(tree, 0, NULL, true); } } #endif /* In Codegen it will typically introduce a spill temp here */ /* rather than relocating the register to a non trashed reg */ rpPredictSpillCnt++; /* Remove it from the spillMask */ spillMask &= ~tmpMask; } } /* * If the return registers in regMask conflicts with the lockedRegs * then we allocate extra registers for the reload of the conflicting * registers. * * Set spillMask to the set of locked registers that have to be reloaded here. * reloadMask is set to the extra registers that are used to reload * the spilled lockedRegs. */ noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL)); spillMask = lockedRegs & regMask; if (spillMask) { /* Remove the spillMask from regMask */ regMask &= ~spillMask; regMaskTP reloadMask = RBM_NONE; while (spillMask) { /* Get an extra register to hold it */ regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask); #ifdef DEBUG if (verbose) { printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg))); gtDispTree(tree, 0, NULL, true); } #endif reloadMask |= reloadReg; /* Remove it from the spillMask */ spillMask &= ~genFindLowestBit(spillMask); } /* Update regMask to use the reloadMask */ regMask |= reloadMask; /* update the gtUsedRegs mask */ tree->gtUsedRegs |= (regMaskSmall)regMask; } regMaskTP regUse = tree->gtUsedRegs; regUse |= interferingRegs; if (!VarSetOps::IsEmpty(this, compCurLife)) { // Add interference between the current set of live variables and // the set of temporary registers need to evaluate the sub tree if (regUse) { rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use")); } } if (rpAsgVarNum != -1) { // Add interference between the registers used (if any) // and the assignment target variable if (regUse) { rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use")); } // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand // side of the assignment passed here using PREDICT_REG_VAR_Txx) // to the set of currently live variables. This new interference will prevent us // from using the register value used here for enregistering different live variable // if (!VarSetOps::IsEmpty(this, compCurLife)) { rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict")); } } /* Do we need to resore the oldLastUseVars value */ if (restoreLastUseVars) { /* If we used a GT_ASG targeted register then we need to add * a variable interference between any new last use variables * and the GT_ASG targeted register */ if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1) { rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn tgt last use conflict")); } VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars); } return regMask; } #ifdef _PREFAST_ #pragma warning(pop) #endif #endif // LEGACY_BACKEND /****************************************************************************/ /* Returns true when we must create an EBP frame This is used to force most managed methods to have EBP based frames which allows the ETW kernel stackwalker to walk the stacks of managed code this allows the kernel to perform light weight profiling */ bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason)) { bool result = false; #ifdef DEBUG const char* reason = nullptr; #endif #if ETW_EBP_FRAMED if (!result && (opts.MinOpts() || opts.compDbgCode)) { INDEBUG(reason = "Debug Code"); result = true; } if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE)) { INDEBUG(reason = "IL Code Size"); result = true; } if (!result && (fgBBcount > 3)) { INDEBUG(reason = "BasicBlock Count"); result = true; } if (!result && fgHasLoops) { INDEBUG(reason = "Method has Loops"); result = true; } if (!result && (optCallCount >= 2)) { INDEBUG(reason = "Call Count"); result = true; } if (!result && (optIndirectCallCount >= 1)) { INDEBUG(reason = "Indirect Call"); result = true; } #endif // ETW_EBP_FRAMED // VM wants to identify the containing frame of an InlinedCallFrame always // via the frame register never the stack register so we need a frame. if (!result && (optNativeCallCount != 0)) { INDEBUG(reason = "Uses PInvoke"); result = true; } #ifdef _TARGET_ARM64_ // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame // pointer frames. if (!result) { INDEBUG(reason = "Temporary ARM64 force frame pointer"); result = true; } #endif // _TARGET_ARM64_ #ifdef DEBUG if ((result == true) && (wbReason != nullptr)) { *wbReason = reason; } #endif return result; } #ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead. /***************************************************************************** * * Predict which variables will be assigned to registers * This is x86 specific and only predicts the integer registers and * must be conservative, any register that is predicted to be enregister * must end up being enregistered. * * The rpPredictTreeRegUse takes advantage of the LCL_VARS that are * predicted to be enregistered to minimize calls to rpPredictRegPick. * */ #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail) { unsigned regInx; if (rpPasses <= rpPassesPessimize) { // Assume that we won't have to reverse EBP enregistration rpReverseEBPenreg = false; // Set the default rpFrameType based upon codeGen->isFramePointerRequired() if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired()) rpFrameType = FT_EBP_FRAME; else rpFrameType = FT_ESP_FRAME; } #if !ETW_EBP_FRAMED // If we are using FPBASE as the frame register, we cannot also use it for // a local var if (rpFrameType == FT_EBP_FRAME) { regAvail &= ~RBM_FPBASE; } #endif // !ETW_EBP_FRAMED rpStkPredict = 0; rpPredictAssignMask = regAvail; raSetupArgMasks(&codeGen->intRegState); #if !FEATURE_STACK_FP_X87 raSetupArgMasks(&codeGen->floatRegState); #endif // If there is a secret stub param, it is also live in if (info.compPublishStubParam) { codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; } if (regAvail == RBM_NONE) { unsigned lclNum; LclVarDsc* varDsc; for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { #if FEATURE_STACK_FP_X87 if (!varDsc->IsFloatRegType()) #endif { varDsc->lvRegNum = REG_STK; if (isRegPairType(varDsc->lvType)) varDsc->lvOtherReg = REG_STK; } } } #ifdef DEBUG if (verbose) { printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses); printf("\n Available registers = "); dspRegMask(regAvail); printf("\n"); } #endif if (regAvail == RBM_NONE) { return RBM_NONE; } /* We cannot change the lvVarIndexes at this point, so we */ /* can only re-order the existing set of tracked variables */ /* Which will change the order in which we select the */ /* locals for enregistering. */ assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables. // Should not be set unless optimizing noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false)); if (lvaSortAgain) lvaSortOnly(); #ifdef DEBUG fgDebugCheckBBlist(); #endif /* Initialize the weighted count of variables that could have */ /* been enregistered but weren't */ unsigned refCntStk = 0; // sum of ref counts for all stack based variables unsigned refCntEBP = 0; // sum of ref counts for EBP enregistered variables unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables #if DOUBLE_ALIGN unsigned refCntStkParam; // sum of ref counts for all stack based parameters unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles #if FEATURE_STACK_FP_X87 refCntStkParam = raCntStkParamDblStackFP; refCntWtdStkDbl = raCntWtdStkDblStackFP; refCntStk = raCntStkStackFP; #else refCntStkParam = 0; refCntWtdStkDbl = 0; refCntStk = 0; #endif // FEATURE_STACK_FP_X87 #endif // DOUBLE_ALIGN /* Set of registers used to enregister variables in the predition */ regMaskTP regUsed = RBM_NONE; /*------------------------------------------------------------------------- * * Predict/Assign the enregistered locals in ref-count order * */ VARSET_TP VARSET_INIT_NOCOPY(unprocessedVars, VarSetOps::MakeFull(this)); unsigned FPRegVarLiveInCnt; FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method LclVarDsc* varDsc; for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++) { bool notWorthy = false; unsigned varIndex; bool isDouble; regMaskTP regAvailForType; var_types regType; regMaskTP avoidReg; unsigned customVarOrderSize; regNumber customVarOrder[MAX_VAR_ORDER_SIZE]; bool firstHalf; regNumber saveOtherReg; varDsc = lvaRefSorted[sortNum]; #if FEATURE_STACK_FP_X87 if (varTypeIsFloating(varDsc->TypeGet())) { #ifdef DEBUG if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) { // Field local of a PROMOTION_TYPE_DEPENDENT struct should not // be en-registered. noway_assert(!varDsc->lvRegister); } #endif continue; } #endif /* Check the set of invariant things that would prevent enregistration */ /* Ignore the variable if it's not tracked */ if (!varDsc->lvTracked) goto CANT_REG; /* Get hold of the index and the interference mask for the variable */ varIndex = varDsc->lvVarIndex; // Remove 'varIndex' from unprocessedVars VarSetOps::RemoveElemD(this, unprocessedVars, varIndex); // Skip the variable if it's marked as DoNotEnregister. if (varDsc->lvDoNotEnregister) goto CANT_REG; /* TODO: For now if we have JMP all register args go to stack * TODO: Later consider extending the life of the argument or make a copy of it */ if (compJmpOpUsed && varDsc->lvIsRegArg) goto CANT_REG; /* Skip the variable if the ref count is zero */ if (varDsc->lvRefCnt == 0) goto CANT_REG; /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) { goto CANT_REG; } /* Is the unweighted ref count too low to be interesting? */ if (!varDsc->lvIsStructField && // We do encourage enregistering field locals. (varDsc->lvRefCnt <= 1)) { /* Sometimes it's useful to enregister a variable with only one use */ /* arguments referenced in loops are one example */ if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT) goto OK_TO_ENREGISTER; /* If the variable has a preferred register set it may be useful to put it there */ if (varDsc->lvPrefReg && varDsc->lvIsRegArg) goto OK_TO_ENREGISTER; /* Keep going; the table is sorted by "weighted" ref count */ goto CANT_REG; } OK_TO_ENREGISTER: if (varTypeIsFloating(varDsc->TypeGet())) { regType = varDsc->TypeGet(); regAvailForType = regAvail & RBM_ALLFLOAT; } else { regType = TYP_INT; regAvailForType = regAvail & RBM_ALLINT; } #ifdef _TARGET_ARM_ isDouble = (varDsc->TypeGet() == TYP_DOUBLE); if (isDouble) { regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers } #endif /* If we don't have any registers available then skip the enregistration attempt */ if (regAvailForType == RBM_NONE) goto NO_REG; // On the pessimize passes don't even try to enregister LONGS if (isRegPairType(varDsc->lvType)) { if (rpPasses > rpPassesPessimize) goto NO_REG; else if (rpLostEnreg && (rpPasses == rpPassesPessimize)) goto NO_REG; } // Set of registers to avoid when performing register allocation avoidReg = RBM_NONE; if (!varDsc->lvIsRegArg) { /* For local variables, * avoid the incoming arguments, * but only if you conflict with them */ if (raAvoidArgRegMask != 0) { LclVarDsc* argDsc; LclVarDsc* argsEnd = lvaTable + info.compArgsCount; for (argDsc = lvaTable; argDsc < argsEnd; argDsc++) { if (!argDsc->lvIsRegArg) continue; bool isFloat = argDsc->IsFloatRegType(); regNumber inArgReg = argDsc->lvArgReg; regMaskTP inArgBit = genRegMask(inArgReg); // Is this inArgReg in the raAvoidArgRegMask set? if (!(raAvoidArgRegMask & inArgBit)) continue; noway_assert(argDsc->lvIsParam); noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS)); unsigned locVarIndex = varDsc->lvVarIndex; unsigned argVarIndex = argDsc->lvVarIndex; /* Does this variable interfere with the arg variable ? */ if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex)) { noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex)); /* Yes, so try to avoid the incoming arg reg */ avoidReg |= inArgBit; } else { noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex)); } } } } // Now we will try to predict which register the variable // could be enregistered in customVarOrderSize = MAX_VAR_ORDER_SIZE; raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg); firstHalf = false; saveOtherReg = DUMMY_INIT(REG_NA); for (regInx = 0; regInx < customVarOrderSize; regInx++) { regNumber regNum = customVarOrder[regInx]; regMaskTP regBits = genRegMask(regNum); /* Skip this register if it isn't available */ if ((regAvailForType & regBits) == 0) continue; /* Skip this register if it interferes with the variable */ if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex)) continue; if (varTypeIsFloating(regType)) { #ifdef _TARGET_ARM_ if (isDouble) { regNumber regNext = REG_NEXT(regNum); regBits |= genRegMask(regNext); /* Skip if regNext interferes with the variable */ if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex)) continue; } #endif } bool firstUseOfReg = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0); bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT)); bool calleeSavedReg = ((regBits & RBM_CALLEE_SAVED) != 0); /* Skip this register if the weighted ref count is less than two and we are considering a unused callee saved register */ if (lessThanTwoRefWtd && // less than two references (weighted) firstUseOfReg && // first use of this register calleeSavedReg) // callee saved register { unsigned int totalRefCntWtd = varDsc->lvRefCntWtd; // psc is abbeviation for possibleSameColor VARSET_TP VARSET_INIT_NOCOPY(pscVarSet, VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex])); VARSET_ITER_INIT(this, pscIndexIter, pscVarSet, pscIndex); while (pscIndexIter.NextElem(this, &pscIndex)) { LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex]; totalRefCntWtd += pscVar->lvRefCntWtd; if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT)) break; } if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT)) { notWorthy = true; continue; // not worth spilling a callee saved register } // otherwise we will spill this callee saved registers, // because its uses when combined with the uses of // other yet to be processed candidates exceed our threshold. // totalRefCntWtd = totalRefCntWtd; } /* Looks good - mark the variable as living in the register */ if (isRegPairType(varDsc->lvType)) { if (firstHalf == false) { /* Enregister the first half of the long */ varDsc->lvRegNum = regNum; saveOtherReg = varDsc->lvOtherReg; varDsc->lvOtherReg = REG_STK; firstHalf = true; } else { /* Ensure 'well-formed' register pairs */ /* (those returned by gen[Pick|Grab]RegPair) */ if (regNum < varDsc->lvRegNum) { varDsc->lvOtherReg = varDsc->lvRegNum; varDsc->lvRegNum = regNum; } else { varDsc->lvOtherReg = regNum; } firstHalf = false; } } else { varDsc->lvRegNum = regNum; #ifdef _TARGET_ARM_ if (isDouble) { varDsc->lvOtherReg = REG_NEXT(regNum); } #endif } if (regNum == REG_FPBASE) { refCntEBP += varDsc->lvRefCnt; refCntWtdEBP += varDsc->lvRefCntWtd; #if DOUBLE_ALIGN if (varDsc->lvIsParam) { refCntStkParam += varDsc->lvRefCnt; } #endif } /* Record this register in the regUsed set */ regUsed |= regBits; /* The register is now ineligible for all interfering variables */ VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]); #ifdef _TARGET_ARM_ if (isDouble) { regNumber secondHalf = REG_NEXT(regNum); VARSET_ITER_INIT(this, iter, lvaVarIntf[varIndex], intfIndex); while (iter.NextElem(this, &intfIndex)) { VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex); } } #endif /* If a register argument, remove its incoming register * from the "avoid" list */ if (varDsc->lvIsRegArg) { raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg); #ifdef _TARGET_ARM_ if (isDouble) { raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg)); } #endif } /* A variable of TYP_LONG can take two registers */ if (firstHalf) continue; // Since we have successfully enregistered this variable it is // now time to move on and consider the next variable goto ENREG_VAR; } if (firstHalf) { noway_assert(isRegPairType(varDsc->lvType)); /* This TYP_LONG is partially enregistered */ noway_assert(saveOtherReg != DUMMY_INIT(REG_NA)); if (varDsc->lvDependReg && (saveOtherReg != REG_STK)) { rpLostEnreg = true; } raAddToStkPredict(varDsc->lvRefCntWtd); goto ENREG_VAR; } NO_REG:; if (varDsc->lvDependReg) { rpLostEnreg = true; } if (!notWorthy) { /* Weighted count of variables that could have been enregistered but weren't */ raAddToStkPredict(varDsc->lvRefCntWtd); if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK)) raAddToStkPredict(varDsc->lvRefCntWtd); } CANT_REG:; varDsc->lvRegister = false; varDsc->lvRegNum = REG_STK; if (isRegPairType(varDsc->lvType)) varDsc->lvOtherReg = REG_STK; /* unweighted count of variables that were not enregistered */ refCntStk += varDsc->lvRefCnt; #if DOUBLE_ALIGN if (varDsc->lvIsParam) { refCntStkParam += varDsc->lvRefCnt; } else { /* Is it a stack based double? */ /* Note that double params are excluded since they can not be double aligned */ if (varDsc->lvType == TYP_DOUBLE) { refCntWtdStkDbl += varDsc->lvRefCntWtd; } } #endif #ifdef DEBUG if (verbose) { printf("; "); gtDispLclVar((unsigned)(varDsc - lvaTable)); if (varDsc->lvTracked) printf("T%02u", varDsc->lvVarIndex); else printf(" "); printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd)); if (varDsc->lvDoNotEnregister) printf(", do-not-enregister"); printf("\n"); } #endif continue; ENREG_VAR:; varDsc->lvRegister = true; // Record the fact that we enregistered a stack arg when tail call is used. if (compJmpOpUsed && !varDsc->lvIsRegArg) { rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum); if (isRegPairType(varDsc->lvType)) { rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg); } } #ifdef DEBUG if (verbose) { printf("; "); gtDispLclVar((unsigned)(varDsc - lvaTable)); printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd)); varDsc->PrintVarReg(); #ifdef _TARGET_ARM_ if (isDouble) { printf(":%s", getRegName(varDsc->lvOtherReg)); } #endif printf("\n"); } #endif } #if ETW_EBP_FRAMED noway_assert(refCntEBP == 0); #endif #ifdef DEBUG if (verbose) { if (refCntStk > 0) printf("; refCntStk = %u\n", refCntStk); if (refCntEBP > 0) printf("; refCntEBP = %u\n", refCntEBP); if (refCntWtdEBP > 0) printf("; refCntWtdEBP = %u\n", refCntWtdEBP); #if DOUBLE_ALIGN if (refCntStkParam > 0) printf("; refCntStkParam = %u\n", refCntStkParam); if (refCntWtdStkDbl > 0) printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl); #endif } #endif /* Determine how the EBP register should be used */ CLANG_FORMAT_COMMENT_ANCHOR; #if DOUBLE_ALIGN if (!codeGen->isFramePointerRequired()) { noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN); /* First let us decide if we should use EBP to create a double-aligned frame, instead of enregistering variables */ if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN) { rpFrameType = FT_DOUBLE_ALIGN_FRAME; goto REVERSE_EBP_ENREG; } if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0)) { if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl)) { rpFrameType = FT_DOUBLE_ALIGN_FRAME; goto REVERSE_EBP_ENREG; } } } #endif // DOUBLE_ALIGN if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired()) { #ifdef _TARGET_XARCH_ // clang-format off /* If we are using EBP to enregister variables then will we actually save bytes by setting up an EBP frame? Each stack reference is an extra byte of code if we use an ESP frame. Here we measure the savings that we get by using EBP to enregister variables vs. the cost in code size that we pay when using an ESP based frame. We pay one byte of code for each refCntStk but we save one byte (or more) for each refCntEBP. Our savings are the elimination of a stack memory read/write. We use the loop weighted value of refCntWtdEBP * mem_access_weight (0, 3, 6) to represent this savings. */ // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10] // to set up an EBP frame in the prolog and epilog #define EBP_FRAME_SETUP_SIZE 5 // clang-format on if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE)) { unsigned bytesSaved = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE); unsigned mem_access_weight = 3; if (compCodeOpt() == SMALL_CODE) mem_access_weight = 0; else if (compCodeOpt() == FAST_CODE) mem_access_weight *= 2; if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT)) { /* It's not be a good idea to use EBP in our predictions */ CLANG_FORMAT_COMMENT_ANCHOR; #ifdef DEBUG if (verbose && (refCntEBP > 0)) printf("; Predicting that it's not worth using EBP to enregister variables\n"); #endif rpFrameType = FT_EBP_FRAME; goto REVERSE_EBP_ENREG; } } #endif // _TARGET_XARCH_ if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME)) { #ifdef DEBUG const char* reason; #endif if (rpMustCreateEBPCalled == false) { rpMustCreateEBPCalled = true; if (rpMustCreateEBPFrame(INDEBUG(&reason))) { #ifdef DEBUG if (verbose) printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason); #endif codeGen->setFrameRequired(true); rpFrameType = FT_EBP_FRAME; goto REVERSE_EBP_ENREG; } } } } goto EXIT; REVERSE_EBP_ENREG: noway_assert(rpFrameType != FT_ESP_FRAME); rpReverseEBPenreg = true; #if !ETW_EBP_FRAMED if (refCntEBP > 0) { noway_assert(regUsed & RBM_FPBASE); regUsed &= ~RBM_FPBASE; /* variables that were enregistered in EBP become stack based variables */ raAddToStkPredict(refCntWtdEBP); unsigned lclNum; /* We're going to have to undo some predicted enregistered variables */ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { /* Is this a register variable? */ if (varDsc->lvRegNum != REG_STK) { if (isRegPairType(varDsc->lvType)) { /* Only one can be EBP */ if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE) { if (varDsc->lvRegNum == REG_FPBASE) varDsc->lvRegNum = varDsc->lvOtherReg; varDsc->lvOtherReg = REG_STK; if (varDsc->lvRegNum == REG_STK) varDsc->lvRegister = false; if (varDsc->lvDependReg) rpLostEnreg = true; #ifdef DEBUG if (verbose) goto DUMP_MSG; #endif } } else { if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType())) { varDsc->lvRegNum = REG_STK; varDsc->lvRegister = false; if (varDsc->lvDependReg) rpLostEnreg = true; #ifdef DEBUG if (verbose) { DUMP_MSG: printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum, varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2, (varDsc->lvRefCntWtd & 1) ? ".5" : ""); } #endif } } } } } #endif // ETW_EBP_FRAMED EXIT:; unsigned lclNum; for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { /* Clear the lvDependReg flag for next iteration of the predictor */ varDsc->lvDependReg = false; // If we set rpLostEnreg and this is the first pessimize pass // then reverse the enreg of all TYP_LONG if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize)) { varDsc->lvRegNum = REG_STK; varDsc->lvOtherReg = REG_STK; } } #ifdef DEBUG if (verbose && raNewBlocks) { printf("\nAdded FP register killing blocks:\n"); fgDispBasicBlocks(); printf("\n"); } #endif noway_assert(rpFrameType != FT_NOT_SET); /* return the set of registers used to enregister variables */ return regUsed; } #ifdef _PREFAST_ #pragma warning(pop) #endif /***************************************************************************** * * Predict register use for every tree in the function. Note that we do this * at different times (not to mention in a totally different way) for x86 vs * RISC targets. */ void Compiler::rpPredictRegUse() { #ifdef DEBUG if (verbose) raDumpVarIntf(); #endif // We might want to adjust the ref counts based on interference raAdjustVarIntf(); regMaskTP allAcceptableRegs = RBM_ALLINT; #if FEATURE_FP_REGALLOC allAcceptableRegs |= raConfigRestrictMaskFP(); #endif allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call to acdHelper(). This is done implicitly, without creating a GT_CALL node. Hence, this interference is be handled implicitly by restricting the registers used for enregistering variables */ if (opts.compDbgCode) { allAcceptableRegs &= RBM_CALLEE_SAVED; } /* Compute the initial regmask to use for the first pass */ regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs; regMaskTP regUsed; #if CPU_USES_BLOCK_MOVE /* If we might need to generate a rep mov instruction */ /* remove ESI and EDI */ if (compBlkOpUsed) regAvail &= ~(RBM_ESI | RBM_EDI); #endif #ifdef _TARGET_X86_ /* If we using longs then we remove ESI to allow */ /* ESI:EBX to be saved accross a call */ if (compLongUsed) regAvail &= ~(RBM_ESI); #endif #ifdef _TARGET_ARM_ // For the first register allocation pass we don't want to color using r4 // as we want to allow it to be used to color the internal temps instead // when r0,r1,r2,r3 are all in use. // regAvail &= ~(RBM_R4); #endif #if ETW_EBP_FRAMED // We never have EBP available when ETW_EBP_FRAME is defined regAvail &= ~RBM_FPBASE; #else /* If a frame pointer is required then we remove EBP */ if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired()) regAvail &= ~RBM_FPBASE; #endif #ifdef DEBUG BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc(); if (fJitNoRegLoc) regAvail = RBM_NONE; #endif if ((opts.compFlags & CLFLG_REGVAR) == 0) regAvail = RBM_NONE; #if FEATURE_STACK_FP_X87 VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this)); VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this)); // Calculate the set of all tracked FP/non-FP variables // into optAllFloatVars and optAllNonFPvars unsigned lclNum; LclVarDsc* varDsc; for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { /* Ignore the variable if it's not tracked */ if (!varDsc->lvTracked) continue; /* Get hold of the index and the interference mask for the variable */ unsigned varNum = varDsc->lvVarIndex; /* add to the set of all tracked FP/non-FP variables */ if (varDsc->IsFloatRegType()) VarSetOps::AddElemD(this, optAllFloatVars, varNum); else VarSetOps::AddElemD(this, optAllNonFPvars, varNum); } #endif for (unsigned i = 0; i < REG_COUNT; i++) { VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this)); } for (unsigned i = 0; i < lvaTrackedCount; i++) { VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this)); } raNewBlocks = false; rpPredictAssignAgain = false; rpPasses = 0; bool mustPredict = true; unsigned stmtNum = 0; unsigned oldStkPredict = DUMMY_INIT(~0); VARSET_TP oldLclRegIntf[REG_COUNT]; for (unsigned i = 0; i < REG_COUNT; i++) { VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this)); } while (true) { /* Assign registers to variables using the variable/register interference graph (raLclRegIntf[]) calculated in the previous pass */ regUsed = rpPredictAssignRegVars(regAvail); mustPredict |= rpLostEnreg; #ifdef _TARGET_ARM_ // See if we previously reserved REG_R10 and try to make it available if we have a small frame now // if ((rpPasses == 0) && (codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD)) { if (compRsvdRegCheck(REGALLOC_FRAME_LAYOUT)) { // We must keep reserving R10 in this case codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD; } else { // We can release our reservation on R10 and use it to color registers // codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD; allAcceptableRegs |= RBM_OPT_RSVD; } } #endif /* Is our new prediction good enough?? */ if (!mustPredict) { /* For small methods (less than 12 stmts), we add a */ /* extra pass if we are predicting the use of some */ /* of the caller saved registers. */ /* This fixes RAID perf bug 43440 VB Ackerman function */ if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED)) { goto EXTRA_PASS; } /* If every variable was fully enregistered then we're done */ if (rpStkPredict == 0) goto ALL_DONE; // This was a successful prediction. Record it, in case it turns out to be the best one. rpRecordPrediction(); if (rpPasses > 1) { noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0)); // Be careful about overflow unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2; if (oldStkPredict < highStkPredict) goto ALL_DONE; if (rpStkPredict < rpPasses * 8) goto ALL_DONE; if (rpPasses >= (rpPassesMax - 1)) goto ALL_DONE; } EXTRA_PASS: /* We will do another pass */; } #ifdef DEBUG if (JitConfig.JitAssertOnMaxRAPasses()) { noway_assert(rpPasses < rpPassesMax && "This may not a bug, but dev team should look and see what is happening"); } #endif // The "64" here had been "VARSET_SZ". It is unclear why this number is connected with // the (max) size of a VARSET. We've eliminated this constant, so I left this as a constant. We hope // that we're phasing out this code, anyway, and this leaves the behavior the way that it was. if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64) { NO_WAY("we seem to be stuck in an infinite loop. breaking out"); } #ifdef DEBUG if (verbose) { if (rpPasses > 0) { if (rpLostEnreg) printf("\n; Another pass due to rpLostEnreg"); if (rpAddedVarIntf) printf("\n; Another pass due to rpAddedVarIntf"); if ((rpPasses == 1) && rpPredictAssignAgain) printf("\n; Another pass due to rpPredictAssignAgain"); } printf("\n; Register predicting pass# %d\n", rpPasses + 1); } #endif /* Zero the variable/register interference graph */ for (unsigned i = 0; i < REG_COUNT; i++) { VarSetOps::ClearD(this, raLclRegIntf[i]); } // if there are PInvoke calls and compLvFrameListRoot is enregistered, // it must not be in a register trashed by the callee if (info.compCallUnmanaged != 0) { assert(!opts.ShouldUsePInvokeHelpers()); noway_assert(info.compLvFrameListRoot < lvaCount); LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot]; if (pinvokeVarDsc->lvTracked) { rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex) DEBUGARG("compLvFrameListRoot")); // We would prefer to have this be enregister in the PINVOKE_TCB register pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this); } // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the // worst case). Make sure that the return value compiler temp that we create for the single // return block knows about this interference. if (genReturnLocal != BAD_VAR_NUM) { noway_assert(genReturnBB); LclVarDsc* localTmp = &lvaTable[genReturnLocal]; if (localTmp->lvTracked) { rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME, VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal")); } } } #ifdef _TARGET_ARM_ if (compFloatingPointUsed) { bool hasMustInitFloat = false; // if we have any must-init floating point LclVars then we will add register interferences // for the arguments with RBM_SCRATCH // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog() // we won't home the arguments into REG_SCRATCH unsigned lclNum; LclVarDsc* varDsc; for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet())) { hasMustInitFloat = true; break; } } if (hasMustInitFloat) { for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { // If is an incoming argument, that is tracked and not floating-point if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet())) { rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("arg home with must-init fp")); } } } } #endif stmtNum = 0; rpAddedVarIntf = false; rpLostEnreg = false; /* Walk the basic blocks and predict reg use for each tree */ for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext) { GenTreePtr stmt; compCurBB = block; compCurLifeTree = NULL; VarSetOps::Assign(this, compCurLife, block->bbLiveIn); compCurBB = block; for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext) { noway_assert(stmt->gtOper == GT_STMT); rpPredictSpillCnt = 0; VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this)); VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this)); GenTreePtr tree = stmt->gtStmt.gtStmtExpr; stmtNum++; #ifdef DEBUG if (verbose && 1) { printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum); gtDispTree(tree); printf("\n"); } #endif rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE); noway_assert(rpAsgVarNum == -1); if (rpPredictSpillCnt > tmpIntSpillMax) tmpIntSpillMax = rpPredictSpillCnt; } } rpPasses++; /* Decide whether we need to set mustPredict */ mustPredict = false; if (rpAddedVarIntf) { mustPredict = true; #ifdef DEBUG if (verbose) raDumpVarIntf(); #endif } if (rpPasses == 1) { if ((opts.compFlags & CLFLG_REGVAR) == 0) goto ALL_DONE; if (rpPredictAssignAgain) mustPredict = true; #ifdef DEBUG if (fJitNoRegLoc) goto ALL_DONE; #endif } /* Calculate the new value to use for regAvail */ regAvail = allAcceptableRegs; /* If a frame pointer is required then we remove EBP */ if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired()) regAvail &= ~RBM_FPBASE; #if ETW_EBP_FRAMED // We never have EBP available when ETW_EBP_FRAME is defined regAvail &= ~RBM_FPBASE; #endif // If we have done n-passes then we must continue to pessimize the // interference graph by or-ing the interferences from the previous pass if (rpPasses > rpPassesPessimize) { for (unsigned regInx = 0; regInx < REG_COUNT; regInx++) VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]); /* If we reverse an EBP enregistration then keep it that way */ if (rpReverseEBPenreg) regAvail &= ~RBM_FPBASE; } #ifdef DEBUG if (verbose) raDumpRegIntf(); #endif /* Save the old variable/register interference graph */ for (unsigned i = 0; i < REG_COUNT; i++) { VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]); } oldStkPredict = rpStkPredict; } // end of while (true) ALL_DONE:; // If we recorded a better feasible allocation than we ended up with, go back to using it. rpUseRecordedPredictionIfBetter(); #if DOUBLE_ALIGN codeGen->setDoubleAlign(false); #endif switch (rpFrameType) { default: noway_assert(!"rpFrameType not set correctly!"); break; case FT_ESP_FRAME: noway_assert(!codeGen->isFramePointerRequired()); noway_assert(!codeGen->isFrameRequired()); codeGen->setFramePointerUsed(false); break; case FT_EBP_FRAME: noway_assert((regUsed & RBM_FPBASE) == 0); codeGen->setFramePointerUsed(true); break; #if DOUBLE_ALIGN case FT_DOUBLE_ALIGN_FRAME: noway_assert((regUsed & RBM_FPBASE) == 0); noway_assert(!codeGen->isFramePointerRequired()); codeGen->setFramePointerUsed(false); codeGen->setDoubleAlign(true); break; #endif } /* Record the set of registers that we need */ codeGen->regSet.rsClearRegsModified(); if (regUsed != RBM_NONE) { codeGen->regSet.rsSetRegsModified(regUsed); } /* We need genFullPtrRegMap if : * The method is fully interruptible, or * We are generating an EBP-less frame (for stack-pointer deltas) */ genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed()); raMarkStkVars(); #ifdef DEBUG if (verbose) { printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName); printf(" rpStkPredict was %u\n", rpStkPredict); } #endif rpRegAllocDone = true; } #endif // LEGACY_BACKEND /***************************************************************************** * * Mark all variables as to whether they live on the stack frame * (part or whole), and if so what the base is (FP or SP). */ void Compiler::raMarkStkVars() { unsigned lclNum; LclVarDsc* varDsc; for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below. CLANG_FORMAT_COMMENT_ANCHOR; #ifdef LEGACY_BACKEND varDsc->lvOnFrame = false; #endif // LEGACY_BACKEND if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) { noway_assert(!varDsc->lvRegister); goto ON_STK; } /* Fully enregistered variables don't need any frame space */ if (varDsc->lvRegister) { if (!isRegPairType(varDsc->TypeGet())) { goto NOT_STK; } /* For "large" variables make sure both halves are enregistered */ if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK) { goto NOT_STK; } } /* Unused variables typically don't get any frame space */ else if (varDsc->lvRefCnt == 0) { bool needSlot = false; bool stkFixedArgInVarArgs = info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg; // If its address has been exposed, ignore lvRefCnt. However, exclude // fixed arguments in varargs method as lvOnFrame shouldn't be set // for them as we don't want to explicitly report them to GC. if (!stkFixedArgInVarArgs) { needSlot |= varDsc->lvAddrExposed; } #if FEATURE_FIXED_OUT_ARGS /* Is this the dummy variable representing GT_LCLBLK ? */ needSlot |= (lclNum == lvaOutgoingArgSpaceVar); #endif // FEATURE_FIXED_OUT_ARGS #ifdef DEBUG /* For debugging, note that we have to reserve space even for unused variables if they are ever in scope. However, this is not an issue as fgExtendDbgLifetimes() adds an initialization and variables in scope will not have a zero ref-cnt. */ if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked) { for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++) { noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum); } } #endif /* For Debug Code, we have to reserve space even if the variable is never in scope. We will also need to initialize it if it is a GC var. So we set lvMustInit and artifically bump up the ref-cnt. */ if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount) { needSlot |= true; if (lvaTypeIsGC(lclNum)) { varDsc->lvRefCnt = 1; } if (!varDsc->lvIsParam) { varDsc->lvMustInit = true; } } #ifndef LEGACY_BACKEND varDsc->lvOnFrame = needSlot; #endif // !LEGACY_BACKEND if (!needSlot) { /* Clear the lvMustInit flag in case it is set */ varDsc->lvMustInit = false; goto NOT_STK; } } #ifndef LEGACY_BACKEND if (!varDsc->lvOnFrame) { goto NOT_STK; } #endif // !LEGACY_BACKEND ON_STK: /* The variable (or part of it) lives on the stack frame */ noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN)); #if FEATURE_FIXED_OUT_ARGS noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0); #else // FEATURE_FIXED_OUT_ARGS noway_assert(lvaLclSize(lclNum) != 0); #endif // FEATURE_FIXED_OUT_ARGS varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the // stack frame NOT_STK:; varDsc->lvFramePointerBased = codeGen->isFramePointerUsed(); #if DOUBLE_ALIGN if (codeGen->doDoubleAlign()) { noway_assert(codeGen->isFramePointerUsed() == false); /* All arguments are off of EBP with double-aligned frames */ if (varDsc->lvIsParam && !varDsc->lvIsRegArg) { varDsc->lvFramePointerBased = true; } } #endif /* Some basic checks */ // It must be in a register, on frame, or have zero references. noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0); #ifndef LEGACY_BACKEND // We can't have both lvRegister and lvOnFrame for RyuJIT noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame); #else // LEGACY_BACKEND /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */ noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame || (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK)); #endif // LEGACY_BACKEND #ifdef DEBUG // For varargs functions, there should be no direct references to // parameter variables except for 'this' (because these were morphed // in the importer) and the 'arglist' parameter (which is not a GC // pointer). and the return buffer argument (if we are returning a // struct). // This is important because we don't want to try to report them // to the GC, as the frame offsets in these local varables would // not be correct. if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum)) { if (!varDsc->lvPromoted && !varDsc->lvIsStructField) { noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame); } } #endif } } #ifdef LEGACY_BACKEND void Compiler::rpRecordPrediction() { if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict) { if (rpBestRecordedPrediction == NULL) { rpBestRecordedPrediction = reinterpret_cast(compGetMemArrayA(lvaCount, sizeof(VarRegPrediction))); } for (unsigned k = 0; k < lvaCount; k++) { rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister; rpBestRecordedPrediction[k].m_regNum = (regNumberSmall)lvaTable[k].GetRegNum(); rpBestRecordedPrediction[k].m_otherReg = (regNumberSmall)lvaTable[k].GetOtherReg(); } rpBestRecordedStkPredict = rpStkPredict; JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict); } } void Compiler::rpUseRecordedPredictionIfBetter() { JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict, rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0); if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict) { JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict); for (unsigned k = 0; k < lvaCount; k++) { lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered; lvaTable[k].SetRegNum(static_cast(rpBestRecordedPrediction[k].m_regNum)); lvaTable[k].SetOtherReg(static_cast(rpBestRecordedPrediction[k].m_otherReg)); } } } #endif // LEGACY_BACKEND