summaryrefslogtreecommitdiff
path: root/src/jit/regalloc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/regalloc.cpp')
-rw-r--r--src/jit/regalloc.cpp6841
1 files changed, 6841 insertions, 0 deletions
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
new file mode 100644
index 0000000000..9dd7299906
--- /dev/null
+++ b/src/jit/regalloc.cpp
@@ -0,0 +1,6841 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX RegAlloc XX
+XX XX
+XX Does the register allocation and puts the remaining lclVars on the stack XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "regalloc.h"
+
+#if FEATURE_FP_REGALLOC
+Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
+{
+ DWORD val = JitConfig.JitRegisterFP();
+
+ return (enumConfigRegisterFP)(val & 0x3);
+}
+#endif // FEATURE_FP_REGALLOC
+
+regMaskTP Compiler::raConfigRestrictMaskFP()
+{
+ regMaskTP result = RBM_NONE;
+
+#if FEATURE_FP_REGALLOC
+ switch (raConfigRegisterFP())
+ {
+ case CONFIG_REGISTER_FP_NONE:
+ result = RBM_NONE;
+ break;
+ case CONFIG_REGISTER_FP_CALLEE_TRASH:
+ result = RBM_FLT_CALLEE_TRASH;
+ break;
+ case CONFIG_REGISTER_FP_CALLEE_SAVED:
+ result = RBM_FLT_CALLEE_SAVED;
+ break;
+ case CONFIG_REGISTER_FP_FULL:
+ result = RBM_ALLFLOAT;
+ break;
+ }
+#endif
+
+ return result;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+#if DOUBLE_ALIGN
+DWORD Compiler::getCanDoubleAlign()
+{
+#ifdef DEBUG
+ if (compStressCompile(STRESS_DBL_ALN, 20))
+ return MUST_DOUBLE_ALIGN;
+
+ return JitConfig.JitDoubleAlign();
+#else
+ return DEFAULT_DOUBLE_ALIGN;
+#endif
+}
+#endif // DOUBLE_ALIGN
+
+void Compiler::raInit()
+{
+#if FEATURE_STACK_FP_X87
+ /* We have not assigned any FP variables to registers yet */
+
+ VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
+#endif
+ codeGen->intRegState.rsIsFloat = false;
+ codeGen->floatRegState.rsIsFloat = true;
+
+ rpReverseEBPenreg = false;
+ rpAsgVarNum = -1;
+ rpPassesMax = 6;
+ rpPassesPessimize = rpPassesMax - 3;
+ if (opts.compDbgCode)
+ {
+ rpPassesMax++;
+ }
+ rpStkPredict = (unsigned)-1;
+ rpFrameType = FT_NOT_SET;
+ rpLostEnreg = false;
+ rpMustCreateEBPCalled = false;
+ rpRegAllocDone = false;
+ rpMaskPInvokeEpilogIntf = RBM_NONE;
+
+ rpPredictMap[PREDICT_NONE] = RBM_NONE;
+ rpPredictMap[PREDICT_ADDR] = RBM_NONE;
+
+#if FEATURE_FP_REGALLOC
+ rpPredictMap[PREDICT_REG] = RBM_ALLINT | RBM_ALLFLOAT;
+ rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
+#else
+ rpPredictMap[PREDICT_REG] = RBM_ALLINT;
+ rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
+#endif
+
+#define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
+#include "register.h"
+
+#if defined(_TARGET_ARM_)
+
+ rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
+ rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
+ rpPredictMap[PREDICT_REG_SP] = RBM_ILLEGAL;
+
+#elif defined(_TARGET_AMD64_)
+
+ rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
+ rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
+ rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
+
+#elif defined(_TARGET_X86_)
+
+ rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
+ rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
+ rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
+ rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
+ rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
+
+#endif
+
+ rpBestRecordedPrediction = NULL;
+}
+
+/*****************************************************************************
+ *
+ * The following table(s) determines the order in which registers are considered
+ * for variables to live in
+ */
+
+const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
+{
+#if FEATURE_FP_REGALLOC
+ if (varTypeIsFloating(regType))
+ {
+ static const regNumber raRegVarOrderFlt[] = {REG_VAR_ORDER_FLT};
+ const unsigned raRegVarOrderFltSize = sizeof(raRegVarOrderFlt) / sizeof(raRegVarOrderFlt[0]);
+
+ if (wbVarOrderSize != NULL)
+ *wbVarOrderSize = raRegVarOrderFltSize;
+
+ return &raRegVarOrderFlt[0];
+ }
+ else
+#endif
+ {
+ static const regNumber raRegVarOrder[] = {REG_VAR_ORDER};
+ const unsigned raRegVarOrderSize = sizeof(raRegVarOrder) / sizeof(raRegVarOrder[0]);
+
+ if (wbVarOrderSize != NULL)
+ *wbVarOrderSize = raRegVarOrderSize;
+
+ return &raRegVarOrder[0];
+ }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Dump out the variable interference graph
+ *
+ */
+
+void Compiler::raDumpVarIntf()
+{
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ printf("Var. interference graph for %s\n", info.compFullName);
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ continue;
+
+ /* Get hold of the index and the interference mask for the variable */
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ printf(" V%02u,T%02u and ", lclNum, varIndex);
+
+ unsigned refIndex;
+
+ for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
+ {
+ if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
+ printf("T%02u ", refIndex);
+ else
+ printf(" ");
+ }
+
+ printf("\n");
+ }
+
+ printf("\n");
+}
+
+/*****************************************************************************
+ *
+ * Dump out the register interference graph
+ *
+ */
+void Compiler::raDumpRegIntf()
+{
+ printf("Reg. interference graph for %s\n", info.compFullName);
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ unsigned varNum;
+
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ continue;
+
+ /* Get hold of the index and the interference mask for the variable */
+
+ varNum = varDsc->lvVarIndex;
+
+ printf(" V%02u,T%02u and ", lclNum, varNum);
+
+ if (varDsc->IsFloatRegType())
+ {
+#if !FEATURE_STACK_FP_X87
+ for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
+ {
+ if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
+ printf("%3s ", getRegName(regNum, true));
+ else
+ printf(" ");
+ }
+#endif
+ }
+ else
+ {
+ for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
+ {
+ if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
+ printf("%3s ", getRegName(regNum));
+ else
+ printf(" ");
+ }
+ }
+
+ printf("\n");
+ }
+
+ printf("\n");
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * We'll adjust the ref counts based on interference
+ *
+ */
+
+void Compiler::raAdjustVarIntf()
+{
+ // This method was not correct and has been disabled.
+ return;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* Determine register mask for a call/return from type.
+ */
+
+inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
+{
+ var_types type = tree->TypeGet();
+
+ if (type == TYP_STRUCT && IsHfa(tree))
+ {
+ int retSlots = GetHfaCount(tree);
+ return ((1 << retSlots) - 1) << REG_FLOATRET;
+ }
+
+ const static regMaskTP returnMap[TYP_COUNT] = {
+ RBM_ILLEGAL, // TYP_UNDEF,
+ RBM_NONE, // TYP_VOID,
+ RBM_INTRET, // TYP_BOOL,
+ RBM_INTRET, // TYP_CHAR,
+ RBM_INTRET, // TYP_BYTE,
+ RBM_INTRET, // TYP_UBYTE,
+ RBM_INTRET, // TYP_SHORT,
+ RBM_INTRET, // TYP_USHORT,
+ RBM_INTRET, // TYP_INT,
+ RBM_INTRET, // TYP_UINT,
+ RBM_LNGRET, // TYP_LONG,
+ RBM_LNGRET, // TYP_ULONG,
+ RBM_FLOATRET, // TYP_FLOAT,
+ RBM_DOUBLERET, // TYP_DOUBLE,
+ RBM_INTRET, // TYP_REF,
+ RBM_INTRET, // TYP_BYREF,
+ RBM_INTRET, // TYP_ARRAY,
+ RBM_ILLEGAL, // TYP_STRUCT,
+ RBM_ILLEGAL, // TYP_BLK,
+ RBM_ILLEGAL, // TYP_LCLBLK,
+ RBM_ILLEGAL, // TYP_PTR,
+ RBM_ILLEGAL, // TYP_FNC,
+ RBM_ILLEGAL, // TYP_UNKNOWN,
+ };
+
+ assert((unsigned)type < sizeof(returnMap) / sizeof(returnMap[0]));
+ assert(returnMap[TYP_LONG] == RBM_LNGRET);
+ assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
+ assert(returnMap[TYP_REF] == RBM_INTRET);
+ assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
+
+ regMaskTP result = returnMap[type];
+ assert(result != RBM_ILLEGAL);
+ return result;
+}
+
+/*****************************************************************************/
+
+/****************************************************************************/
+
+#ifdef DEBUG
+
+static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
+{
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
+ {
+ if (!varDsc->lvTracked)
+ continue;
+
+ if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
+ continue;
+
+ if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
+ printf("V%02u ", lclNum);
+ }
+}
+
+#endif
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Debugging helpers - display variables liveness info.
+ */
+
+void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
+{
+ do
+ {
+ printf("BB%02u: ", beg->bbNum);
+
+ printf(" in = [ ");
+ dispLifeSet(comp, mask, beg->bbLiveIn);
+ printf("] ,");
+
+ printf(" out = [ ");
+ dispLifeSet(comp, mask, beg->bbLiveOut);
+ printf("]");
+
+ if (beg->bbFlags & BBF_VISITED)
+ printf(" inner=%u", beg->bbFPinVars);
+
+ printf("\n");
+
+ beg = beg->bbNext;
+ if (!beg)
+ return;
+ } while (beg != end);
+}
+
+#if FEATURE_STACK_FP_X87
+void Compiler::raDispFPlifeInfo()
+{
+ BasicBlock* block;
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+
+ printf("BB%02u: in = [ ", block->bbNum);
+ dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
+ printf("]\n\n");
+
+ VARSET_TP VARSET_INIT(this, life, block->bbLiveIn);
+ for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr tree;
+
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
+
+ dispLifeSet(this, optAllFloatVars, life);
+ printf(" ");
+ gtDispTree(tree, 0, NULL, true);
+ }
+
+ printf("\n");
+ }
+
+ printf("BB%02u: out = [ ", block->bbNum);
+ dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
+ printf("]\n\n");
+ }
+}
+#endif // FEATURE_STACK_FP_X87
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+/*****************************************************************************/
+
+void Compiler::raSetRegVarOrder(
+ var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
+{
+ unsigned normalVarOrderSize;
+ const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
+ unsigned index;
+ unsigned listIndex = 0;
+ regMaskTP usedReg = avoidReg;
+
+ noway_assert(*customVarOrderSize >= normalVarOrderSize);
+
+ if (prefReg)
+ {
+ /* First place the preferred registers at the start of customVarOrder */
+
+ regMaskTP regBit;
+ regNumber regNum;
+
+ for (index = 0; index < normalVarOrderSize; index++)
+ {
+ regNum = normalVarOrder[index];
+ regBit = genRegMask(regNum);
+
+ if (usedReg & regBit)
+ continue;
+
+ if (prefReg & regBit)
+ {
+ usedReg |= regBit;
+ noway_assert(listIndex < normalVarOrderSize);
+ customVarOrder[listIndex++] = regNum;
+ prefReg -= regBit;
+ if (prefReg == 0)
+ break;
+ }
+ }
+
+#if CPU_HAS_BYTE_REGS
+ /* Then if byteable registers are preferred place them */
+
+ if (prefReg & RBM_BYTE_REG_FLAG)
+ {
+ for (index = 0; index < normalVarOrderSize; index++)
+ {
+ regNum = normalVarOrder[index];
+ regBit = genRegMask(regNum);
+
+ if (usedReg & regBit)
+ continue;
+
+ if (RBM_BYTE_REGS & regBit)
+ {
+ usedReg |= regBit;
+ noway_assert(listIndex < normalVarOrderSize);
+ customVarOrder[listIndex++] = regNum;
+ }
+ }
+ }
+
+#endif // CPU_HAS_BYTE_REGS
+ }
+
+ /* Now place all the non-preferred registers */
+
+ for (index = 0; index < normalVarOrderSize; index++)
+ {
+ regNumber regNum = normalVarOrder[index];
+ regMaskTP regBit = genRegMask(regNum);
+
+ if (usedReg & regBit)
+ continue;
+
+ usedReg |= regBit;
+ noway_assert(listIndex < normalVarOrderSize);
+ customVarOrder[listIndex++] = regNum;
+ }
+
+ if (avoidReg)
+ {
+ /* Now place the "avoid" registers */
+
+ for (index = 0; index < normalVarOrderSize; index++)
+ {
+ regNumber regNum = normalVarOrder[index];
+ regMaskTP regBit = genRegMask(regNum);
+
+ if (avoidReg & regBit)
+ {
+ noway_assert(listIndex < normalVarOrderSize);
+ customVarOrder[listIndex++] = regNum;
+ avoidReg -= regBit;
+ if (avoidReg == 0)
+ break;
+ }
+ }
+ }
+
+ *customVarOrderSize = listIndex;
+ noway_assert(listIndex == normalVarOrderSize);
+}
+
+/*****************************************************************************
+ *
+ * Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
+ */
+
+void Compiler::raSetupArgMasks(RegState* regState)
+{
+ /* Determine the registers holding incoming register arguments */
+ /* and setup raAvoidArgRegMask to the set of registers that we */
+ /* may want to avoid when enregistering the locals. */
+
+ regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
+ raAvoidArgRegMask = RBM_NONE;
+
+ LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
+
+ for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
+ {
+ noway_assert(argDsc->lvIsParam);
+
+ // Is it a register argument ?
+ if (!argDsc->lvIsRegArg)
+ continue;
+
+ // only process args that apply to the current register file
+ if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
+ {
+ continue;
+ }
+
+ // Is it dead on entry ??
+ // In certain cases such as when compJmpOpUsed is true,
+ // or when we have a generic type context arg that we must report
+ // then the arguments have to be kept alive throughout the prolog.
+ // So we have to consider it as live on entry.
+ //
+ bool keepArgAlive = compJmpOpUsed;
+ if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
+ ((lvaTable + info.compTypeCtxtArg) == argDsc))
+ {
+ keepArgAlive = true;
+ }
+
+ if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ // The code to set the regState for each arg is outlined for shared use
+ // by linear scan
+ regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
+
+ // Do we need to try to avoid this incoming arg registers?
+
+ // If it's not tracked, don't do the stuff below.
+ if (!argDsc->lvTracked)
+ continue;
+
+ // If the incoming arg is used after a call it is live accross
+ // a call and will have to be allocated to a caller saved
+ // register anyway (a very common case).
+ //
+ // In this case it is pointless to ask that the higher ref count
+ // locals to avoid using the incoming arg register
+
+ unsigned argVarIndex = argDsc->lvVarIndex;
+
+ /* Does the incoming register and the arg variable interfere? */
+
+ if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
+ {
+ // No they do not interfere,
+ // so we add inArgReg to raAvoidArgRegMask
+
+ raAvoidArgRegMask |= genRegMask(inArgReg);
+ }
+#ifdef _TARGET_ARM_
+ if (argDsc->lvType == TYP_DOUBLE)
+ {
+ // Avoid the double register argument pair for register allocation.
+ if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
+ {
+ raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
+ }
+ }
+#endif
+ }
+}
+
+#endif // LEGACY_BACKEND
+
+// The code to set the regState for each arg is outlined for shared use
+// by linear scan. (It is not shared for System V AMD64 platform.)
+regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
+{
+ regNumber inArgReg = argDsc->lvArgReg;
+ regMaskTP inArgMask = genRegMask(inArgReg);
+
+ if (regState->rsIsFloat)
+ {
+ noway_assert(inArgMask & RBM_FLTARG_REGS);
+ }
+ else // regState is for the integer registers
+ {
+ // This might be the fixed return buffer register argument (on ARM64)
+ // We check and allow inArgReg to be theFixedRetBuffReg
+ if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
+ {
+ // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
+ noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
+ // We should have recorded the variable number for the return buffer arg
+ noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
+ }
+ else // we have a regular arg
+ {
+ noway_assert(inArgMask & RBM_ARG_REGS);
+ }
+ }
+
+ regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
+
+#ifdef _TARGET_ARM_
+ if (argDsc->lvType == TYP_DOUBLE)
+ {
+ if (info.compIsVarArgs || opts.compUseSoftFP)
+ {
+ assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
+ assert(!regState->rsIsFloat);
+ }
+ else
+ {
+ assert(regState->rsIsFloat);
+ assert(emitter::isDoubleReg(inArgReg));
+ }
+ regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
+ }
+ else if (argDsc->lvType == TYP_LONG)
+ {
+ assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
+ assert(!regState->rsIsFloat);
+ regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
+ }
+#endif // _TARGET_ARM_
+
+#if FEATURE_MULTIREG_ARGS
+ if (argDsc->lvType == TYP_STRUCT)
+ {
+ if (argDsc->lvIsHfaRegArg())
+ {
+ assert(regState->rsIsFloat);
+ unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
+ for (unsigned i = 1; i < cSlots; i++)
+ {
+ assert(inArgReg + i <= LAST_FP_ARGREG);
+ regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
+ }
+ }
+ else
+ {
+ unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
+ for (unsigned i = 1; i < cSlots; i++)
+ {
+ regNumber nextArgReg = (regNumber)(inArgReg + i);
+ if (nextArgReg > REG_ARG_LAST)
+ {
+ break;
+ }
+ assert(regState->rsIsFloat == false);
+ regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
+ }
+ }
+ }
+#endif // FEATURE_MULTIREG_ARGS
+
+ return inArgReg;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+/*****************************************************************************
+ *
+ * Assign variables to live in registers, etc.
+ */
+
+void Compiler::raAssignVars()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In raAssignVars()\n");
+#endif
+ /* We need to keep track of which registers we ever touch */
+
+ codeGen->regSet.rsClearRegsModified();
+
+#if FEATURE_STACK_FP_X87
+ // FP register allocation
+ raEnregisterVarsStackFP();
+ raGenerateFPRefCounts();
+#endif
+
+ /* Predict registers used by code generation */
+ rpPredictRegUse(); // New reg predictor/allocator
+
+ // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
+ // so that the gc tracking logic and lvMustInit logic will ignore them.
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvType != TYP_STRUCT)
+ continue;
+
+ if (!varDsc->lvPromoted)
+ continue;
+
+ if (varDsc->lvIsParam)
+ continue;
+
+ if (varDsc->lvRefCnt > 0)
+ continue;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Mark unused struct local V%02u\n", lclNum);
+ }
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ // This should only happen when all its field locals are unused as well.
+
+ for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
+ varNum++)
+ {
+ noway_assert(lvaTable[varNum].lvRefCnt == 0);
+ }
+ }
+ else
+ {
+ noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
+ }
+
+ varDsc->lvUnusedStruct = 1;
+#endif
+
+ // Change such struct locals to ints
+
+ varDsc->lvType = TYP_INT; // Bash to a non-gc type.
+ noway_assert(!varDsc->lvTracked);
+ noway_assert(!varDsc->lvRegister);
+ varDsc->lvOnFrame = false; // Force it not to be onstack.
+ varDsc->lvMustInit = false; // Force not to init it.
+ varDsc->lvStkOffs = 0; // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Given a regNumber return the correct predictReg enum value
+ */
+
+inline static rpPredictReg rpGetPredictForReg(regNumber reg)
+{
+ return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
+}
+
+/*****************************************************************************
+ *
+ * Given a varIndex return the correct predictReg enum value
+ */
+
+inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
+{
+ return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
+}
+
+/*****************************************************************************
+ *
+ * Given a rpPredictReg return the correct varNumber value
+ */
+
+inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
+{
+ return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
+}
+
+/*****************************************************************************
+ *
+ * Given a rpPredictReg return true if it specifies a Txx register
+ */
+
+inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
+{
+ if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
+ return true;
+ else
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Given a regmask return the correct predictReg enum value
+ */
+
+static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
+{
+ rpPredictReg result = PREDICT_NONE;
+ if (regmask != 0) /* Check if regmask has zero bits set */
+ {
+ if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
+ {
+ DWORD reg = 0;
+ assert(FitsIn<DWORD>(regmask));
+ BitScanForward(&reg, (DWORD)regmask);
+ return rpGetPredictForReg((regNumber)reg);
+ }
+
+#if defined(_TARGET_ARM_)
+ /* It has multiple bits set */
+ else if (regmask == (RBM_R0 | RBM_R1))
+ {
+ result = PREDICT_PAIR_R0R1;
+ }
+ else if (regmask == (RBM_R2 | RBM_R3))
+ {
+ result = PREDICT_PAIR_R2R3;
+ }
+#elif defined(_TARGET_X86_)
+ /* It has multiple bits set */
+ else if (regmask == (RBM_EAX | RBM_EDX))
+ {
+ result = PREDICT_PAIR_EAXEDX;
+ }
+ else if (regmask == (RBM_ECX | RBM_EBX))
+ {
+ result = PREDICT_PAIR_ECXEBX;
+ }
+#endif
+ else /* It doesn't match anything */
+ {
+ result = PREDICT_NONE;
+ assert(!"unreachable");
+ NO_WAY("bad regpair");
+ }
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Record a variable to register(s) interference
+ */
+
+bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
+
+{
+ bool addedIntf = false;
+
+ if (regMask != 0)
+ {
+ for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
+ {
+ regMaskTP regBit = genRegMask(regNum);
+
+ if (regMask & regBit)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
+ if (!VarSetOps::IsEmpty(this, newIntf))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ VARSET_ITER_INIT(this, newIntfIter, newIntf, varNum);
+ while (newIntfIter.NextElem(this, &varNum))
+ {
+ unsigned lclNum = lvaTrackedToVarNum[varNum];
+ LclVarDsc* varDsc = &lvaTable[varNum];
+#if FEATURE_FP_REGALLOC
+ // Only print the useful interferences
+ // i.e. floating point LclVar interference with floating point registers
+ // or integer LclVar interference with general purpose registers
+ if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
+#endif
+ {
+ printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
+ getRegName(regNum), msg);
+ }
+ }
+ }
+#endif
+ addedIntf = true;
+ VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
+ }
+
+ regMask -= regBit;
+ if (regMask == 0)
+ break;
+ }
+ }
+ }
+ return addedIntf;
+}
+
+/*****************************************************************************
+ *
+ * Record a new variable to variable(s) interference
+ */
+
+bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
+{
+ noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
+ noway_assert(!VarSetOps::IsEmpty(this, intfVar));
+
+ VARSET_TP VARSET_INIT_NOCOPY(oneVar, VarSetOps::MakeEmpty(this));
+ VarSetOps::AddElemD(this, oneVar, varNum);
+
+ bool newIntf = fgMarkIntf(intfVar, oneVar);
+
+ if (newIntf)
+ rpAddedVarIntf = true;
+
+#ifdef DEBUG
+ if (verbose && newIntf)
+ {
+ for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
+ {
+ if (VarSetOps::IsMember(this, intfVar, oneNum))
+ {
+ unsigned lclNum = lvaTrackedToVarNum[varNum];
+ unsigned lclOne = lvaTrackedToVarNum[oneNum];
+ printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
+ oneNum, msg);
+ }
+ }
+ }
+#endif
+
+ return newIntf;
+}
+
+/*****************************************************************************
+ *
+ * Determine preferred register mask for a given predictReg value
+ */
+
+inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
+{
+ if (rpHasVarIndexForPredict(predictReg))
+ predictReg = PREDICT_REG;
+
+ noway_assert((unsigned)predictReg < sizeof(rpPredictMap) / sizeof(rpPredictMap[0]));
+ noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
+
+ regMaskTP regAvailForType = rpPredictMap[predictReg];
+ if (varTypeIsFloating(type))
+ {
+ regAvailForType &= RBM_ALLFLOAT;
+ }
+ else
+ {
+ regAvailForType &= RBM_ALLINT;
+ }
+#ifdef _TARGET_ARM_
+ if (type == TYP_DOUBLE)
+ {
+ if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
+ {
+ // Fix 388433 ARM JitStress WP7
+ if ((regAvailForType & RBM_DBL_REGS) != 0)
+ {
+ regAvailForType |= (regAvailForType << 1);
+ }
+ else
+ {
+ regAvailForType = RBM_NONE;
+ }
+ }
+ }
+#endif
+ return regAvailForType;
+}
+
+/*****************************************************************************
+ *
+ * Predict register choice for a type.
+ *
+ * Adds the predicted registers to rsModifiedRegsMask.
+ */
+regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
+{
+ regMaskTP preferReg = rpPredictRegMask(predictReg, type);
+ regNumber regNum;
+ regMaskTP regBits;
+
+ // Add any reserved register to the lockedRegs
+ lockedRegs |= codeGen->regSet.rsMaskResvd;
+
+ /* Clear out the lockedRegs from preferReg */
+ preferReg &= ~lockedRegs;
+
+ if (rpAsgVarNum != -1)
+ {
+ noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
+
+ /* Don't pick the register used by rpAsgVarNum either */
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
+ noway_assert(tgtVar->lvRegNum != REG_STK);
+
+ preferReg &= ~genRegMask(tgtVar->lvRegNum);
+ }
+
+ switch (type)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_SHORT:
+ case TYP_CHAR:
+ case TYP_INT:
+ case TYP_UINT:
+ case TYP_REF:
+ case TYP_BYREF:
+#ifdef _TARGET_AMD64_
+ case TYP_LONG:
+#endif // _TARGET_AMD64_
+
+ // expand preferReg to all non-locked registers if no bits set
+ preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
+
+ if (preferReg == 0) // no bits set?
+ {
+ // Add one predefined spill choice register if no bits set.
+ // (The jit will introduce one spill temp)
+ preferReg |= RBM_SPILL_CHOICE;
+ rpPredictSpillCnt++;
+
+#ifdef DEBUG
+ if (verbose)
+ printf("Predict one spill temp\n");
+#endif
+ }
+
+ if (preferReg != 0)
+ {
+ /* Iterate the registers in the order specified by rpRegTmpOrder */
+
+ for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
+ {
+ regNum = rpRegTmpOrder[index];
+ regBits = genRegMask(regNum);
+
+ if ((preferReg & regBits) == regBits)
+ {
+ goto RET;
+ }
+ }
+ }
+ /* Otherwise we have allocated all registers, so do nothing */
+ break;
+
+#ifndef _TARGET_AMD64_
+ case TYP_LONG:
+
+ if ((preferReg == 0) || // no bits set?
+ ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
+ {
+ // expand preferReg to all non-locked registers
+ preferReg = RBM_ALLINT & ~lockedRegs;
+ }
+
+ if (preferReg == 0) // no bits set?
+ {
+ // Add EAX:EDX to the registers
+ // (The jit will introduce two spill temps)
+ preferReg = RBM_PAIR_TMP;
+ rpPredictSpillCnt += 2;
+#ifdef DEBUG
+ if (verbose)
+ printf("Predict two spill temps\n");
+#endif
+ }
+ else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
+ {
+ if ((preferReg & RBM_PAIR_TMP_LO) == 0)
+ {
+ // Add EAX to the registers
+ // (The jit will introduce one spill temp)
+ preferReg |= RBM_PAIR_TMP_LO;
+ }
+ else
+ {
+ // Add EDX to the registers
+ // (The jit will introduce one spill temp)
+ preferReg |= RBM_PAIR_TMP_HI;
+ }
+ rpPredictSpillCnt++;
+#ifdef DEBUG
+ if (verbose)
+ printf("Predict one spill temp\n");
+#endif
+ }
+
+ regPairNo regPair;
+ regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
+ if (regPair != REG_PAIR_NONE)
+ {
+ regBits = genRegPairMask(regPair);
+ goto RET;
+ }
+
+ /* Otherwise we have allocated all registers, so do nothing */
+ break;
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_ARM_
+ case TYP_STRUCT:
+#endif
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+#if FEATURE_FP_REGALLOC
+ regMaskTP restrictMask;
+ restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
+ assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
+
+ // expand preferReg to all available non-locked registers if no bits set
+ preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
+ regMaskTP preferDouble;
+ preferDouble = preferReg & (preferReg >> 1);
+
+ if ((preferReg == 0) // no bits set?
+#ifdef _TARGET_ARM_
+ || ((type == TYP_DOUBLE) &&
+ ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
+#endif
+ )
+ {
+ // Add one predefined spill choice register if no bits set.
+ // (The jit will introduce one spill temp)
+ preferReg |= RBM_SPILL_CHOICE_FLT;
+ rpPredictSpillCnt++;
+
+#ifdef DEBUG
+ if (verbose)
+ printf("Predict one spill temp (float)\n");
+#endif
+ }
+
+ assert(preferReg != 0);
+
+ /* Iterate the registers in the order specified by raRegFltTmpOrder */
+
+ for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
+ {
+ regNum = raRegFltTmpOrder[index];
+ regBits = genRegMask(regNum);
+
+ if (varTypeIsFloating(type))
+ {
+#ifdef _TARGET_ARM_
+ if (type == TYP_DOUBLE)
+ {
+ if ((regBits & RBM_DBL_REGS) == 0)
+ {
+ continue; // We must restrict the set to the double registers
+ }
+ else
+ {
+ // TYP_DOUBLE use two consecutive registers
+ regBits |= genRegMask(REG_NEXT(regNum));
+ }
+ }
+#endif
+ // See if COMPlus_JitRegisterFP is restricting this FP register
+ //
+ if ((restrictMask & regBits) != regBits)
+ continue;
+ }
+
+ if ((preferReg & regBits) == regBits)
+ {
+ goto RET;
+ }
+ }
+ /* Otherwise we have allocated all registers, so do nothing */
+ break;
+
+#else // !FEATURE_FP_REGALLOC
+
+ return RBM_NONE;
+
+#endif
+
+ default:
+ noway_assert(!"unexpected type in reg use prediction");
+ }
+
+ /* Abnormal return */
+ noway_assert(!"Ran out of registers in rpPredictRegPick");
+ return RBM_NONE;
+
+RET:
+ /*
+ * If during the first prediction we need to allocate
+ * one of the registers that we used for coloring locals
+ * then flag this by setting rpPredictAssignAgain.
+ * We will have to go back and repredict the registers
+ */
+ if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
+ rpPredictAssignAgain = true;
+
+ // Add a register interference to each of the last use variables
+ if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(lastUse, VarSetOps::MakeEmpty(this));
+ VarSetOps::Assign(this, lastUse, rpLastUseVars);
+ VARSET_TP VARSET_INIT_NOCOPY(inPlaceUse, VarSetOps::MakeEmpty(this));
+ VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
+ // While we still have any lastUse or inPlaceUse bits
+ VARSET_TP VARSET_INIT_NOCOPY(useUnion, VarSetOps::Union(this, lastUse, inPlaceUse));
+
+ VARSET_TP VARSET_INIT_NOCOPY(varAsSet, VarSetOps::MakeEmpty(this));
+ VARSET_ITER_INIT(this, iter, useUnion, varNum);
+ while (iter.NextElem(this, &varNum))
+ {
+ // We'll need this for one of the calls...
+ VarSetOps::ClearD(this, varAsSet);
+ VarSetOps::AddElemD(this, varAsSet, varNum);
+
+ // If this varBit and lastUse?
+ if (VarSetOps::IsMember(this, lastUse, varNum))
+ {
+ // Record a register to variable interference
+ rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
+ }
+
+ // If this varBit and inPlaceUse?
+ if (VarSetOps::IsMember(this, inPlaceUse, varNum))
+ {
+ // Record a register to variable interference
+ rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
+ }
+ }
+ }
+ codeGen->regSet.rsSetRegsModified(regBits);
+
+ return regBits;
+}
+
+/*****************************************************************************
+ *
+ * Predict integer register use for generating an address mode for a tree,
+ * by setting tree->gtUsedRegs to all registers used by this tree and its
+ * children.
+ * tree - is the child of a GT_IND node
+ * type - the type of the GT_IND node (floating point/integer)
+ * lockedRegs - are the registers which are currently held by
+ * a previously evaluated node.
+ * rsvdRegs - registers which should not be allocated because they will
+ * be needed to evaluate a node in the future
+ * - Also if rsvdRegs has the RBM_LASTUSE bit set then
+ * the rpLastUseVars set should be saved and restored
+ * so that we don't add any new variables to rpLastUseVars
+ * lenCSE - is non-NULL only when we have a lenCSE expression
+ *
+ * Return the scratch registers to be held by this tree. (one or two registers
+ * to form an address expression)
+ */
+
+regMaskTP Compiler::rpPredictAddressMode(
+ GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
+{
+ GenTreePtr op1;
+ GenTreePtr op2;
+ GenTreePtr opTemp;
+ genTreeOps oper = tree->OperGet();
+ regMaskTP op1Mask;
+ regMaskTP op2Mask;
+ regMaskTP regMask;
+ ssize_t sh;
+ ssize_t cns = 0;
+ bool rev;
+ bool hasTwoAddConst = false;
+ bool restoreLastUseVars = false;
+ VARSET_TP VARSET_INIT_NOCOPY(oldLastUseVars, VarSetOps::MakeEmpty(this));
+
+ /* do we need to save and restore the rpLastUseVars set ? */
+ if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
+ {
+ restoreLastUseVars = true;
+ VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
+ }
+ rsvdRegs &= ~RBM_LASTUSE;
+
+ /* if not an add, then just force it to a register */
+
+ if (oper != GT_ADD)
+ {
+ if (oper == GT_ARR_ELEM)
+ {
+ regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
+ goto DONE;
+ }
+ else
+ {
+ goto NO_ADDR_EXPR;
+ }
+ }
+
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+
+ /* look for (x + y) + icon address mode */
+
+ if (op2->OperGet() == GT_CNS_INT)
+ {
+ cns = op2->gtIntCon.gtIconVal;
+
+ /* if not an add, then just force op1 into a register */
+ if (op1->OperGet() != GT_ADD)
+ goto ONE_ADDR_EXPR;
+
+ hasTwoAddConst = true;
+
+ /* Record the 'rev' flag, reverse evaluation order */
+ rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
+
+ op2 = op1->gtOp.gtOp2;
+ op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
+ }
+
+ /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
+
+ sh = 0;
+ if (op2->OperGet() == GT_LSH)
+ {
+ if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ sh = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+ opTemp = op2->gtOp.gtOp1;
+ }
+ else
+ {
+ opTemp = NULL;
+ }
+ }
+ else
+ {
+ opTemp = op2;
+ }
+
+ if (opTemp != NULL)
+ {
+ if (opTemp->OperGet() == GT_NOP)
+ {
+ opTemp = opTemp->gtOp.gtOp1;
+ }
+
+ // Is this a const operand?
+ if (opTemp->OperGet() == GT_CNS_INT)
+ {
+ // Compute the new cns value that Codegen will end up using
+ cns += (opTemp->gtIntCon.gtIconVal << sh);
+
+ goto ONE_ADDR_EXPR;
+ }
+ }
+
+ /* Check for LSH in op1 slot */
+
+ if (op1->OperGet() != GT_LSH)
+ goto TWO_ADDR_EXPR;
+
+ opTemp = op1->gtOp.gtOp2;
+
+ if (opTemp->OperGet() != GT_CNS_INT)
+ goto TWO_ADDR_EXPR;
+
+ sh = opTemp->gtIntCon.gtIconVal;
+
+ /* Check for LSH of 0, special case */
+ if (sh == 0)
+ goto TWO_ADDR_EXPR;
+
+#if defined(_TARGET_XARCH_)
+
+ /* Check for LSH of 1 2 or 3 */
+ if (sh > 3)
+ goto TWO_ADDR_EXPR;
+
+#elif defined(_TARGET_ARM_)
+
+ /* Check for LSH of 1 to 30 */
+ if (sh > 30)
+ goto TWO_ADDR_EXPR;
+
+#else
+
+ goto TWO_ADDR_EXPR;
+
+#endif
+
+ /* Matched a leftShift by 'sh' subtree, move op1 down */
+ op1 = op1->gtOp.gtOp1;
+
+TWO_ADDR_EXPR:
+
+ /* Now we have to evaluate op1 and op2 into registers */
+
+ /* Evaluate op1 and op2 in the correct order */
+ if (rev)
+ {
+ op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
+ }
+ else
+ {
+ op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
+ }
+
+ /* If op1 and op2 must be spilled and reloaded then
+ * op1 and op2 might be reloaded into the same register
+ * This can only happen when all the registers are lockedRegs
+ */
+ if ((op1Mask == op2Mask) && (op1Mask != 0))
+ {
+ /* We'll need to grab a different register for op2 */
+ op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
+ }
+
+#ifdef _TARGET_ARM_
+ // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
+ // [op2 + op1<<sh + cns]
+ // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
+ //
+ if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
+ {
+ op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+ }
+
+ //
+ // On the ARM we will need at least one scratch register for trees that have this form:
+ // [op1 + op2 + cns] or [op1 + op2<<sh + cns]
+ // or for a float/double or long when we have both op1 and op2
+ // or when we have an 'cns' that is too large for the ld/st instruction
+ //
+ if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
+ {
+ op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+ }
+
+ //
+ // If we create a CSE that immediately dies then we may need to add an additional register interference
+ // so we don't color the CSE into R3
+ //
+ if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
+ {
+ opTemp = op2->gtOp.gtOp2;
+ if (opTemp->OperGet() == GT_LCL_VAR)
+ {
+ unsigned varNum = opTemp->gtLclVar.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
+ {
+ rpRecordRegIntf(RBM_TMP_0,
+ VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
+ }
+ }
+ }
+#endif
+
+ regMask = (op1Mask | op2Mask);
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto DONE;
+
+ONE_ADDR_EXPR:
+
+ /* now we have to evaluate op1 into a register */
+
+ op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+ op2Mask = RBM_NONE;
+
+#ifdef _TARGET_ARM_
+ //
+ // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
+ // instruction
+ //
+ if (!codeGen->validDispForLdSt(cns, type))
+ {
+ op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+ }
+#endif
+
+ regMask = (op1Mask | op2Mask);
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto DONE;
+
+NO_ADDR_EXPR:
+
+#if !CPU_LOAD_STORE_ARCH
+ if (oper == GT_CNS_INT)
+ {
+ /* Indirect of a constant does not require a register */
+ regMask = RBM_NONE;
+ }
+ else
+#endif
+ {
+ /* now we have to evaluate tree into a register */
+ regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
+ }
+
+DONE:
+ regMaskTP regUse = tree->gtUsedRegs;
+
+ if (!VarSetOps::IsEmpty(this, compCurLife))
+ {
+ // Add interference between the current set of life variables and
+ // the set of temporary registers need to evaluate the sub tree
+ if (regUse)
+ {
+ rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
+ }
+ }
+
+ /* Do we need to resore the oldLastUseVars value */
+ if (restoreLastUseVars)
+ {
+ /*
+ * If we used a GT_ASG targeted register then we need to add
+ * a variable interference between any new last use variables
+ * and the GT_ASG targeted register
+ */
+ if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
+ {
+ rpRecordVarIntf(rpAsgVarNum,
+ VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
+ }
+ VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
+ }
+
+ return regMask;
+}
+
+/*****************************************************************************
+ *
+ *
+ */
+
+void Compiler::rpPredictRefAssign(unsigned lclNum)
+{
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ varDsc->lvRefAssign = 1;
+
+#if NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
+ printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
+ varDsc->lvVarIndex);
+ }
+#endif
+
+ /* Make sure that write barrier pointer variables never land in EDX */
+ VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
+#endif // NOGC_WRITE_BARRIERS
+}
+
+/*****************************************************************************
+ *
+ * Predict the internal temp physical register usage for a block assignment tree,
+ * by setting tree->gtUsedRegs.
+ * Records the internal temp physical register usage for this tree.
+ * Returns a mask of interfering registers for this tree.
+ *
+ * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
+ * to the set of scratch registers needed when evaluating the tree.
+ * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
+ * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
+ * predict additional internal temp physical registers to spill into.
+ *
+ * tree - is the child of a GT_IND node
+ * predictReg - what type of register does the tree need
+ * lockedRegs - are the registers which are currently held by a previously evaluated node.
+ * Don't modify lockedRegs as it is used at the end to compute a spill mask.
+ * rsvdRegs - registers which should not be allocated because they will
+ * be needed to evaluate a node in the future
+ * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
+ * the rpLastUseVars set should be saved and restored
+ * so that we don't add any new variables to rpLastUseVars.
+ */
+regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr tree,
+ rpPredictReg predictReg,
+ regMaskTP lockedRegs,
+ regMaskTP rsvdRegs)
+{
+ regMaskTP regMask = RBM_NONE;
+ regMaskTP interferingRegs = RBM_NONE;
+
+ bool hasGCpointer = false;
+ bool dstIsOnStack = false;
+ bool useMemHelper = false;
+ bool useBarriers = false;
+ GenTreeBlk* dst = tree->gtGetOp1()->AsBlk();
+ GenTreePtr dstAddr = dst->Addr();
+ GenTreePtr srcAddrOrFill = tree->gtGetOp2();
+
+ size_t blkSize = dst->gtBlkSize;
+
+ hasGCpointer = (dst->HasGCPtr());
+
+ bool isCopyBlk = tree->OperIsCopyBlkOp();
+ bool isCopyObj = isCopyBlk && hasGCpointer;
+ bool isInitBlk = tree->OperIsInitBlkOp();
+
+ if (isCopyBlk)
+ {
+ assert(srcAddrOrFill->OperIsIndir());
+ srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
+ }
+ else
+ {
+ // For initBlk, we don't need to worry about the GC pointers.
+ hasGCpointer = false;
+ }
+
+ if (blkSize != 0)
+ {
+ if (isCopyObj)
+ {
+ dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
+ }
+
+ if (isInitBlk)
+ {
+ if (srcAddrOrFill->OperGet() != GT_CNS_INT)
+ {
+ useMemHelper = true;
+ }
+ }
+ }
+ else
+ {
+ useMemHelper = true;
+ }
+
+ if (hasGCpointer && !dstIsOnStack)
+ {
+ useBarriers = true;
+ }
+
+#ifdef _TARGET_ARM_
+ //
+ // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
+ //
+ if (!useMemHelper && !useBarriers)
+ {
+ bool useLoop = false;
+ unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
+
+ // A mask to use to force the predictor to choose low registers (to reduce code size)
+ regMaskTP avoidReg = (RBM_R12 | RBM_LR);
+
+ // Allow the src and dst to be used in place, unless we use a loop, in which
+ // case we will need scratch registers as we will be writing to them.
+ rpPredictReg srcAndDstPredict = PREDICT_REG;
+
+ // Will we be using a loop to implement this INITBLK/COPYBLK?
+ if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
+ {
+ useLoop = true;
+ avoidReg = RBM_NONE;
+ srcAndDstPredict = PREDICT_SCRATCH_REG;
+ }
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
+ dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
+ regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
+ }
+ else
+ {
+ regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
+ srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
+ regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
+ }
+
+ // We need at least one scratch register for a copyBlk
+ if (isCopyBlk)
+ {
+ // Pick a low register to reduce the code size
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
+ }
+
+ if (useLoop)
+ {
+ if (isCopyBlk)
+ {
+ // We need a second temp register for a copyBlk (our code gen is load two/store two)
+ // Pick another low register to reduce the code size
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
+ }
+
+ // We need a loop index register
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+ }
+
+ tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
+
+ return interferingRegs;
+ }
+#endif
+ // What order should the Dest, Val/Src, and Size be calculated
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
+
+#if defined(_TARGET_XARCH_)
+ fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
+
+ // We're going to use these, might as well make them available now
+
+ codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
+ if (isCopyBlk)
+ codeGen->regSet.rsSetRegsModified(RBM_ESI);
+
+#elif defined(_TARGET_ARM_)
+
+ if (useMemHelper)
+ {
+ // For all other cases that involve non-constants, we just call memcpy/memset
+ // JIT helpers
+ fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
+ interferingRegs |= RBM_CALLEE_TRASH;
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
+#endif
+ }
+ else // useBarriers
+ {
+ assert(useBarriers);
+ assert(isCopyBlk);
+
+ fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
+
+ // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
+ interferingRegs |= RBM_CALLEE_TRASH_NOGC;
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
+#endif
+ }
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
+#endif // !_TARGET_X86_ && !_TARGET_ARM_
+ regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
+ regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
+ opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
+ regMask |= regsPtr[0];
+ opsPtr[0]->gtUsedRegs |= regsPtr[0];
+ rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
+
+ regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
+ opsPtr2RsvdRegs | RBM_LASTUSE);
+ regMask |= regsPtr[1];
+ opsPtr[1]->gtUsedRegs |= regsPtr[1];
+ rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
+
+ regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
+ if (opsPtr[2] == nullptr)
+ {
+ // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
+ // Note that it is quite possible that no register is required, but this preserves
+ // former behavior.
+ regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
+ rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
+ }
+ else
+ {
+ regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
+ opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
+ }
+ regMask |= opsPtr2UsedRegs;
+
+ tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
+ return interferingRegs;
+}
+
+/*****************************************************************************
+ *
+ * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
+ * Returns a regMask with the internal temp physical register usage for this tree.
+ *
+ * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
+ * to the set of scratch registers needed when evaluating the tree.
+ * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
+ * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
+ * predict additional internal temp physical registers to spill into.
+ *
+ * tree - is the child of a GT_IND node
+ * predictReg - what type of register does the tree need
+ * lockedRegs - are the registers which are currently held by a previously evaluated node.
+ * Don't modify lockedRegs as it is used at the end to compute a spill mask.
+ * rsvdRegs - registers which should not be allocated because they will
+ * be needed to evaluate a node in the future
+ * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
+ * the rpLastUseVars set should be saved and restored
+ * so that we don't add any new variables to rpLastUseVars.
+ */
+
+#pragma warning(disable : 4701)
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
+ rpPredictReg predictReg,
+ regMaskTP lockedRegs,
+ regMaskTP rsvdRegs)
+{
+ regMaskTP regMask = DUMMY_INIT(RBM_ILLEGAL);
+ regMaskTP op2Mask;
+ regMaskTP tmpMask;
+ rpPredictReg op1PredictReg;
+ rpPredictReg op2PredictReg;
+ LclVarDsc* varDsc = NULL;
+ VARSET_TP VARSET_INIT_NOCOPY(oldLastUseVars, VarSetOps::UninitVal());
+
+ VARSET_TP VARSET_INIT_NOCOPY(varBits, VarSetOps::UninitVal());
+ VARSET_TP VARSET_INIT_NOCOPY(lastUseVarBits, VarSetOps::MakeEmpty(this));
+
+ bool restoreLastUseVars = false;
+ regMaskTP interferingRegs = RBM_NONE;
+
+#ifdef DEBUG
+ // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
+ noway_assert(tree);
+ noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
+ noway_assert(RBM_ILLEGAL);
+ noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
+ /* impossible values, to make sure that we set them */
+ tree->gtUsedRegs = RBM_ILLEGAL;
+#endif
+
+ /* Figure out what kind of a node we have */
+
+ genTreeOps oper = tree->OperGet();
+ var_types type = tree->TypeGet();
+ unsigned kind = tree->OperKind();
+
+ // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
+ genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
+ if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
+ predictReg = PREDICT_NONE;
+ else if (rpHasVarIndexForPredict(predictReg))
+ {
+ // The only place where predictReg is set to a var is in the PURE
+ // assignment case where varIndex is the var being assigned to.
+ // We need to check whether the variable is used between here and
+ // its redefinition.
+ unsigned varIndex = rpGetVarIndexForPredict(predictReg);
+ unsigned lclNum = lvaTrackedToVarNum[varIndex];
+ bool found = false;
+ for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
+ {
+ if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
+ {
+ // Is this the pure assignment?
+ if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
+ {
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ found = true;
+ break;
+ }
+ }
+ assert(found);
+ }
+
+ if (rsvdRegs & RBM_LASTUSE)
+ {
+ restoreLastUseVars = true;
+ VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
+ rsvdRegs &= ~RBM_LASTUSE;
+ }
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ bool lastUse = false;
+ regMaskTP enregMask = RBM_NONE;
+
+ switch (oper)
+ {
+#ifdef _TARGET_ARM_
+ case GT_CNS_DBL:
+ // Codegen for floating point constants on the ARM is currently
+ // movw/movt rT1, <lo32 bits>
+ // movw/movt rT2, <hi32 bits>
+ // vmov.i2d dT0, rT1,rT2
+ //
+ // For TYP_FLOAT one integer register is required
+ //
+ // These integer register(s) immediately die
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+ if (type == TYP_DOUBLE)
+ {
+ // For TYP_DOUBLE a second integer register is required
+ //
+ tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+ }
+
+ // We also need a floating point register that we keep
+ //
+ if (predictReg == PREDICT_NONE)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
+ tree->gtUsedRegs = regMask | tmpMask;
+ goto RETURN_CHECK;
+#endif
+
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+
+ if (rpHasVarIndexForPredict(predictReg))
+ {
+ unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+ rpAsgVarNum = tgtIndex;
+
+ // We don't need any register as we plan on writing to the rpAsgVarNum register
+ predictReg = PREDICT_NONE;
+
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
+ tgtVar->lvDependReg = true;
+
+ if (type == TYP_LONG)
+ {
+ assert(oper == GT_CNS_LNG);
+
+ if (tgtVar->lvOtherReg == REG_STK)
+ {
+ // Well we do need one register for a partially enregistered
+ type = TYP_INT;
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ }
+ }
+ else
+ {
+#if !CPU_LOAD_STORE_ARCH
+ /* If the constant is a handle then it will need to have a relocation
+ applied to it. It will need to be loaded into a register.
+ But never throw away an existing hint.
+ */
+ if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
+#endif
+ {
+ if (predictReg == PREDICT_NONE)
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ }
+ break;
+
+ case GT_NO_OP:
+ break;
+
+ case GT_CLS_VAR:
+ if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
+ (genTypeSize(type) < sizeof(int)))
+ {
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+#ifdef _TARGET_ARM_
+ // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
+ //
+ if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
+ {
+ // These integer register(s) immediately die
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+ // Two integer registers are required for a TYP_DOUBLE
+ if (type == TYP_DOUBLE)
+ tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+ }
+ // We need a temp register in some cases of loads/stores to a class var
+ if (predictReg == PREDICT_NONE)
+ {
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+#endif
+ if (rpHasVarIndexForPredict(predictReg))
+ {
+ unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+ rpAsgVarNum = tgtIndex;
+
+ // We don't need any register as we plan on writing to the rpAsgVarNum register
+ predictReg = PREDICT_NONE;
+
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
+ tgtVar->lvDependReg = true;
+
+ if (type == TYP_LONG)
+ {
+ if (tgtVar->lvOtherReg == REG_STK)
+ {
+ // Well we do need one register for a partially enregistered
+ type = TYP_INT;
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ }
+ }
+ break;
+
+ case GT_LCL_FLD:
+#ifdef _TARGET_ARM_
+ // Check for a misalignment on a Floating Point field
+ //
+ if (varTypeIsFloating(type))
+ {
+ if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
+ {
+ // These integer register(s) immediately die
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+ // Two integer registers are required for a TYP_DOUBLE
+ if (type == TYP_DOUBLE)
+ tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+ }
+ }
+#endif
+ __fallthrough;
+
+ case GT_LCL_VAR:
+ case GT_REG_VAR:
+
+ varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
+
+ VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
+ compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
+ lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
+
+#if FEATURE_STACK_FP_X87
+ // If it's a floating point var, there's nothing to do
+ if (varTypeIsFloating(type))
+ {
+ tree->gtUsedRegs = RBM_NONE;
+ regMask = RBM_NONE;
+ goto RETURN_CHECK;
+ }
+#endif
+
+ // If the variable is already a register variable, no need to go further.
+ if (oper == GT_REG_VAR)
+ break;
+
+ /* Apply the type of predictReg to the LCL_VAR */
+
+ if (predictReg == PREDICT_REG)
+ {
+ PREDICT_REG_COMMON:
+ if (varDsc->lvRegNum == REG_STK)
+ break;
+
+ goto GRAB_COUNT;
+ }
+ else if (predictReg == PREDICT_SCRATCH_REG)
+ {
+ noway_assert(predictReg == PREDICT_SCRATCH_REG);
+
+ /* Is this the last use of a local var? */
+ if (lastUse)
+ {
+ if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
+ goto PREDICT_REG_COMMON;
+ }
+ }
+ else if (rpHasVarIndexForPredict(predictReg))
+ {
+ /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
+ {
+ unsigned tgtIndex1 = rpGetVarIndexForPredict(predictReg);
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex1];
+ VarSetOps::MakeSingleton(this, tgtIndex1);
+
+ noway_assert(tgtVar->lvVarIndex == tgtIndex1);
+ noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
+#ifndef _TARGET_AMD64_
+ // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
+ // so this assert is meaningless
+ noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
+#endif // !_TARGET_AMD64_
+
+ if (varDsc->lvTracked)
+ {
+ unsigned srcIndex;
+ srcIndex = varDsc->lvVarIndex;
+
+ // If this register has it's last use here then we will prefer
+ // to color to the same register as tgtVar.
+ if (lastUse)
+ {
+ /*
+ * Add an entry in the lvaVarPref graph to indicate
+ * that it would be worthwhile to color these two variables
+ * into the same physical register.
+ * This will help us avoid having an extra copy instruction
+ */
+ VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
+ VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
+ }
+
+ // Add a variable interference from srcIndex to each of the last use variables
+ if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+ {
+ rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
+ }
+ }
+ rpAsgVarNum = tgtIndex1;
+
+ /* We will rely on the target enregistered variable from the GT_ASG */
+ varDsc = tgtVar;
+ }
+ GRAB_COUNT:
+ unsigned grabCount;
+ grabCount = 0;
+
+ if (genIsValidFloatReg(varDsc->lvRegNum))
+ {
+ enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ enregMask = genRegMask(varDsc->lvRegNum);
+ }
+
+#ifdef _TARGET_ARM_
+ if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
+ {
+ // We need to compute the intermediate value using a TYP_DOUBLE
+ // but we storing the result in a TYP_SINGLE enregistered variable
+ //
+ grabCount++;
+ }
+ else
+#endif
+ {
+ /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
+ if (enregMask & (rsvdRegs | lockedRegs))
+ {
+ grabCount++;
+ }
+#ifndef _TARGET_64BIT_
+ if (type == TYP_LONG)
+ {
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ tmpMask = genRegMask(varDsc->lvOtherReg);
+ enregMask |= tmpMask;
+
+ /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
+ if (tmpMask & (rsvdRegs | lockedRegs))
+ grabCount++;
+ }
+ else // lvOtherReg == REG_STK
+ {
+ grabCount++;
+ }
+ }
+#endif // _TARGET_64BIT_
+ }
+
+ varDsc->lvDependReg = true;
+
+ if (grabCount == 0)
+ {
+ /* Does not need a register */
+ predictReg = PREDICT_NONE;
+ // noway_assert(!VarSetOps::IsEmpty(this, varBits));
+ VarSetOps::UnionD(this, rpUseInPlace, varBits);
+ }
+ else // (grabCount > 0)
+ {
+#ifndef _TARGET_64BIT_
+ /* For TYP_LONG and we only need one register then change the type to TYP_INT */
+ if ((type == TYP_LONG) && (grabCount == 1))
+ {
+ /* We will need to pick one register */
+ type = TYP_INT;
+ // noway_assert(!VarSetOps::IsEmpty(this, varBits));
+ VarSetOps::UnionD(this, rpUseInPlace, varBits);
+ }
+ noway_assert((type == TYP_DOUBLE) ||
+ (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
+#else // !_TARGET_64BIT_
+ noway_assert(grabCount == 1);
+#endif // !_TARGET_64BIT_
+ }
+ }
+ else if (type == TYP_STRUCT)
+ {
+#ifdef _TARGET_ARM_
+ // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
+ // predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
+ // As a workaround we just bash it to PREDICT_NONE here
+ //
+ if (predictReg != PREDICT_NONE)
+ predictReg = PREDICT_NONE;
+#endif
+ // Currently predictReg is saying that we will not need any scratch registers
+ noway_assert(predictReg == PREDICT_NONE);
+
+ /* We may need to sign or zero extend a small type when pushing a struct */
+ if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
+ {
+ for (unsigned varNum = varDsc->lvFieldLclStart;
+ varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
+ {
+ LclVarDsc* fldVar = lvaTable + varNum;
+
+ if (fldVar->lvStackAligned())
+ {
+ // When we are stack aligned Codegen will just use
+ // a push instruction and thus doesn't need any register
+ // since we can push both a register or a stack frame location
+ continue;
+ }
+
+ if (varTypeIsByte(fldVar->TypeGet()))
+ {
+ // We will need to reserve one byteable register,
+ //
+ type = TYP_BYTE;
+ predictReg = PREDICT_SCRATCH_REG;
+#if CPU_HAS_BYTE_REGS
+ // It is best to enregister this fldVar in a byteable register
+ //
+ fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
+#endif
+ }
+ else if (varTypeIsShort(fldVar->TypeGet()))
+ {
+ bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
+ // If fldVar is not enregistered then we will need a scratch register
+ //
+ if (!isEnregistered)
+ {
+ // We will need either an int register or a byte register
+ // If we are not requesting a byte register we will request an int register
+ //
+ if (type != TYP_BYTE)
+ type = TYP_INT;
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ regMaskTP preferReg = rpPredictRegMask(predictReg, type);
+ if (preferReg != 0)
+ {
+ if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
+ {
+ varDsc->addPrefReg(preferReg, this);
+ }
+ }
+ }
+ break; /* end of case GT_LCL_VAR */
+
+ case GT_JMP:
+ tree->gtUsedRegs = RBM_NONE;
+ regMask = RBM_NONE;
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ // Mark the registers required to emit a tailcall profiler callback
+ if (compIsProfilerHookNeeded())
+ {
+ tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
+ }
+#endif
+ goto RETURN_CHECK;
+
+ default:
+ break;
+ } /* end of switch (oper) */
+
+ /* If we don't need to evaluate to register, regmask is the empty set */
+ /* Otherwise we grab a temp for the local variable */
+
+ if (predictReg == PREDICT_NONE)
+ regMask = RBM_NONE;
+ else
+ {
+ regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
+
+ if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
+ {
+ /* We need to sign or zero extend a small type when pushing a struct */
+ noway_assert((type == TYP_INT) || (type == TYP_BYTE));
+
+ varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
+ noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
+
+ for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
+ varNum++)
+ {
+ LclVarDsc* fldVar = lvaTable + varNum;
+ if (fldVar->lvTracked)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(fldBit, VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
+ rpRecordRegIntf(regMask, fldBit DEBUGARG(
+ "need scratch register when pushing a small field of a struct"));
+ }
+ }
+ }
+ }
+
+ /* Update the set of lastUse variables that we encountered so far */
+ if (lastUse)
+ {
+ VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
+ VARSET_TP VARSET_INIT(this, varAsSet, lastUseVarBits);
+
+ /*
+ * Add interference from any previously locked temps into this last use variable.
+ */
+ if (lockedRegs)
+ {
+ rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
+ }
+ /*
+ * Add interference from any reserved temps into this last use variable.
+ */
+ if (rsvdRegs)
+ {
+ rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
+ }
+ /*
+ * For partially enregistered longs add an interference with the
+ * register return by rpPredictRegPick
+ */
+ if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
+ {
+ rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
+ }
+ }
+
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
+
+ VARSET_TP VARSET_INIT_NOCOPY(startAsgUseInPlaceVars, VarSetOps::UninitVal());
+
+ switch (oper)
+ {
+ case GT_ASG:
+
+ if (tree->OperIsBlkOp())
+ {
+ interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
+ regMask = 0;
+ goto RETURN_CHECK;
+ }
+ /* Is the value being assigned into a LCL_VAR? */
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+
+ /* Are we assigning a LCL_VAR the result of a call? */
+ if (op2->gtOper == GT_CALL)
+ {
+ /* Set a preferred register for the LCL_VAR */
+ if (isRegPairType(varDsc->TypeGet()))
+ varDsc->addPrefReg(RBM_LNGRET, this);
+ else if (!varTypeIsFloating(varDsc->TypeGet()))
+ varDsc->addPrefReg(RBM_INTRET, this);
+#ifdef _TARGET_AMD64_
+ else
+ varDsc->addPrefReg(RBM_FLOATRET, this);
+#endif
+ /*
+ * When assigning the result of a call we don't
+ * bother trying to target the right side of the
+ * assignment, since we have a fixed calling convention.
+ */
+ }
+ else if (varDsc->lvTracked)
+ {
+ // We interfere with uses in place
+ if (!VarSetOps::IsEmpty(this, rpUseInPlace))
+ {
+ rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
+ }
+
+ // Did we predict that this local will be fully enregistered?
+ // and the assignment type is the same as the expression type?
+ // and it is dead on the right side of the assignment?
+ // and we current have no other rpAsgVarNum active?
+ //
+ if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
+ (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
+ {
+ //
+ // Yes, we should try to target the right side (op2) of this
+ // assignment into the (enregistered) tracked variable.
+ //
+
+ op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
+ op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
+
+ // Remember that this is a new use in place
+
+ // We've added "new UseInPlace"; remove from the global set.
+ VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
+
+ // Note that later when we walk down to the leaf node for op2
+ // if we decide to actually use the register for the 'varDsc'
+ // to enregister the operand, the we will set rpAsgVarNum to
+ // varDsc->lvVarIndex, by extracting this value using
+ // rpGetVarIndexForPredict()
+ //
+ // Also we reset rpAsgVarNum back to -1 after we have finished
+ // predicting the current GT_ASG node
+ //
+ goto ASG_COMMON;
+ }
+ }
+ }
+ __fallthrough;
+
+ case GT_CHS:
+
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_AND:
+ case GT_ASG_SUB:
+ case GT_ASG_ADD:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ case GT_ASG_UDIV:
+
+ /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
+ if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
+ {
+ /* Is the value being assigned into an enregistered LCL_VAR? */
+ /* For debug code we only allow a simple op2 to be assigned */
+ if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
+ {
+ varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+ /* Did we predict that this local will be enregistered? */
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ /* Yes, we can use "reg <op>= addr" */
+
+ op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
+ op2PredictReg = PREDICT_NONE;
+
+ goto ASG_COMMON;
+ }
+ }
+ }
+
+#if CPU_LOAD_STORE_ARCH
+ if (oper != GT_ASG)
+ {
+ op1PredictReg = PREDICT_REG;
+ op2PredictReg = PREDICT_REG;
+ }
+ else
+#endif
+ {
+ /*
+ * Otherwise, initialize the normal forcing of operands:
+ * "addr <op>= reg"
+ */
+ op1PredictReg = PREDICT_ADDR;
+ op2PredictReg = PREDICT_REG;
+ }
+
+ ASG_COMMON:
+
+#if !CPU_LOAD_STORE_ARCH
+ if (op2PredictReg != PREDICT_NONE)
+ {
+ /* Is the value being assigned a simple one? */
+ if (rpCanAsgOperWithoutReg(op2, false))
+ op2PredictReg = PREDICT_NONE;
+ }
+#endif
+
+ bool simpleAssignment;
+ simpleAssignment = false;
+
+ if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
+ {
+ // Add a variable interference from the assign target
+ // to each of the last use variables
+ if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+ {
+ varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+
+ if (varDsc->lvTracked)
+ {
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
+ }
+ }
+
+ /* Record whether this tree is a simple assignment to a local */
+
+ simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
+ }
+
+ bool requireByteReg;
+ requireByteReg = false;
+
+#if CPU_HAS_BYTE_REGS
+ /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
+
+ if (varTypeIsByte(type) &&
+ ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
+
+ {
+ // Byte-assignments typically need a byte register
+ requireByteReg = true;
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+
+ // Did we predict that this local will be enregistered?
+ if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
+ {
+ // We don't require a byte register when op1 is an enregistered local */
+ requireByteReg = false;
+ }
+
+ // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
+ if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
+ {
+ // We should try to put op1 in an byte register
+ varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
+ }
+ }
+ }
+#endif
+
+ VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
+
+ bool isWriteBarrierAsgNode;
+ isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
+#ifdef DEBUG
+ GCInfo::WriteBarrierForm wbf;
+ if (isWriteBarrierAsgNode)
+ wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+ else
+ wbf = GCInfo::WBF_NoBarrier;
+#endif // DEBUG
+
+ regMaskTP wbaLockedRegs;
+ wbaLockedRegs = lockedRegs;
+ if (isWriteBarrierAsgNode)
+ {
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+ if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+ {
+#endif // DEBUG
+ wbaLockedRegs |= RBM_WRITE_BARRIER;
+ op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
+ assert(REG_WRITE_BARRIER == REG_EDX);
+ op1PredictReg = PREDICT_REG_EDX;
+#ifdef DEBUG
+ }
+ else
+#endif // DEBUG
+#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+ {
+#ifdef _TARGET_X86_
+ op1PredictReg = PREDICT_REG_ECX;
+ op2PredictReg = PREDICT_REG_EDX;
+#elif defined(_TARGET_ARM_)
+ op1PredictReg = PREDICT_REG_R0;
+ op2PredictReg = PREDICT_REG_R1;
+
+ // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
+ if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
+ {
+ op1 = op1->gtOp.gtOp1;
+ }
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
+#endif
+ }
+#endif
+ }
+
+ /* Are we supposed to evaluate RHS first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+
+#if CPU_HAS_BYTE_REGS
+ // Should we insure that op2 gets evaluated into a byte register?
+ if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
+ {
+ // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+ // and we can't select one that is already reserved (i.e. lockedRegs)
+ //
+ op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
+ op2->gtUsedRegs |= op2Mask;
+
+ // No longer a simple assignment because we're using extra registers and might
+ // have interference between op1 and op2. See DevDiv #136681
+ simpleAssignment = false;
+ }
+#endif
+ /*
+ * For a simple assignment we don't want the op2Mask to be
+ * marked as interferring with the LCL_VAR, since it is likely
+ * that we will want to enregister the LCL_VAR in exactly
+ * the register that is used to compute op2
+ */
+ tmpMask = lockedRegs;
+
+ if (!simpleAssignment)
+ tmpMask |= op2Mask;
+
+ regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
+
+ // Did we relax the register prediction for op1 and op2 above ?
+ // - because we are depending upon op1 being enregistered
+ //
+ if ((op1PredictReg == PREDICT_NONE) &&
+ ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
+ {
+ /* We must be assigning into an enregistered LCL_VAR */
+ noway_assert(op1->gtOper == GT_LCL_VAR);
+ varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+ noway_assert(varDsc->lvRegNum != REG_STK);
+
+ /* We need to set lvDependReg, in case we lose the enregistration of op1 */
+ varDsc->lvDependReg = true;
+ }
+ }
+ else
+ {
+ // For the case of simpleAssignments op2 should always be evaluated first
+ noway_assert(!simpleAssignment);
+
+ regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ if (isWriteBarrierAsgNode)
+ {
+ wbaLockedRegs |= op1->gtUsedRegs;
+ }
+ op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
+
+#if CPU_HAS_BYTE_REGS
+ // Should we insure that op2 gets evaluated into a byte register?
+ if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
+ {
+ // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+ // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
+ //
+ op2Mask |=
+ rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
+ op2->gtUsedRegs |= op2Mask;
+ }
+#endif
+ }
+
+ if (rpHasVarIndexForPredict(op2PredictReg))
+ {
+ rpAsgVarNum = -1;
+ }
+
+ if (isWriteBarrierAsgNode)
+ {
+#if NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+ if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+ {
+#endif // DEBUG
+
+ /* Steer computation away from REG_WRITE_BARRIER as the pointer is
+ passed to the write-barrier call in REG_WRITE_BARRIER */
+
+ regMask = op2Mask;
+
+ if (op1->gtOper == GT_IND)
+ {
+ GenTreePtr rv1, rv2;
+ unsigned mul, cns;
+ bool rev;
+
+ /* Special handling of indirect assigns for write barrier */
+
+ bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
+ &mul, &cns);
+
+ /* Check address mode for enregisterable locals */
+
+ if (yes)
+ {
+ if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
+ {
+ rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
+ }
+ if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
+ {
+ rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
+ }
+ }
+ }
+
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
+ }
+
+ // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
+ if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+ {
+ rpRecordRegIntf(RBM_WRITE_BARRIER,
+ rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
+ }
+ tree->gtUsedRegs |= RBM_WRITE_BARRIER;
+#ifdef DEBUG
+ }
+ else
+#endif // DEBUG
+#endif // NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !NOGC_WRITE_BARRIERS
+ {
+#ifdef _TARGET_ARM_
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
+#endif
+ //
+ // For the ARM target we have an optimized JIT Helper
+ // that only trashes a subset of the callee saved registers
+ //
+
+ // NOTE: Adding it to the gtUsedRegs will cause the interference to
+ // be added appropriately
+
+ // the RBM_CALLEE_TRASH_NOGC set is killed. We will record this in interferingRegs
+ // instead of gtUsedRegs, because the latter will be modified later, but we need
+ // to remember to add the interference.
+
+ interferingRegs |= RBM_CALLEE_TRASH_NOGC;
+
+ op1->gtUsedRegs |= RBM_R0;
+ op2->gtUsedRegs |= RBM_R1;
+#else // _TARGET_ARM_
+
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
+#endif
+ // We have to call a normal JIT helper to perform the Write Barrier Assignment
+ // It will trash the callee saved registers
+
+ tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+#endif // _TARGET_ARM_
+ }
+#endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
+ }
+
+ if (simpleAssignment)
+ {
+ /*
+ * Consider a simple assignment to a local:
+ *
+ * lcl = expr;
+ *
+ * Since the "=" node is visited after the variable
+ * is marked live (assuming it's live after the
+ * assignment), we don't want to use the register
+ * use mask of the "=" node but rather that of the
+ * variable itself.
+ */
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ }
+ else
+ {
+ tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
+ }
+ VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
+ goto RETURN_CHECK;
+
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ /* assigning shift operators */
+
+ noway_assert(type != TYP_LONG);
+
+#if CPU_LOAD_STORE_ARCH
+ predictReg = PREDICT_ADDR;
+#else
+ predictReg = PREDICT_NONE;
+#endif
+
+ /* shift count is handled same as ordinary shift */
+ goto HANDLE_SHIFT_COUNT;
+
+ case GT_ADDR:
+ regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
+
+ if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
+ {
+ // We need a scratch register for the LEA instruction
+ regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
+ }
+
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+
+ case GT_CAST:
+
+ /* Cannot cast to VOID */
+ noway_assert(type != TYP_VOID);
+
+ /* cast to long is special */
+ if (type == TYP_LONG && op1->gtType <= TYP_INT)
+ {
+ noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
+#if CPU_LONG_USES_REGPAIR
+ rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
+
+ if (rpHasVarIndexForPredict(predictReg))
+ {
+ unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+ rpAsgVarNum = tgtIndex;
+
+ // We don't need any register as we plan on writing to the rpAsgVarNum register
+ predictReg = PREDICT_NONE;
+
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
+ tgtVar->lvDependReg = true;
+
+ if (tgtVar->lvOtherReg != REG_STK)
+ {
+ predictRegHi = PREDICT_NONE;
+ }
+ }
+ else
+#endif
+ if (predictReg == PREDICT_NONE)
+ {
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+#ifdef _TARGET_ARM_
+ // If we are widening an int into a long using a targeted register pair we
+ // should retarget so that the low part get loaded into the appropriate register
+ else if (predictReg == PREDICT_PAIR_R0R1)
+ {
+ predictReg = PREDICT_REG_R0;
+ predictRegHi = PREDICT_REG_R1;
+ }
+ else if (predictReg == PREDICT_PAIR_R2R3)
+ {
+ predictReg = PREDICT_REG_R2;
+ predictRegHi = PREDICT_REG_R3;
+ }
+#endif
+#ifdef _TARGET_X86_
+ // If we are widening an int into a long using a targeted register pair we
+ // should retarget so that the low part get loaded into the appropriate register
+ else if (predictReg == PREDICT_PAIR_EAXEDX)
+ {
+ predictReg = PREDICT_REG_EAX;
+ predictRegHi = PREDICT_REG_EDX;
+ }
+ else if (predictReg == PREDICT_PAIR_ECXEBX)
+ {
+ predictReg = PREDICT_REG_ECX;
+ predictRegHi = PREDICT_REG_EBX;
+ }
+#endif
+
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+#if CPU_LONG_USES_REGPAIR
+ if (predictRegHi != PREDICT_NONE)
+ {
+ // Now get one more reg for the upper part
+ regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
+ }
+#endif
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+
+ /* cast from long is special - it frees a register */
+ if (type <= TYP_INT // nice. this presumably is intended to mean "signed int and shorter types"
+ && op1->gtType == TYP_LONG)
+ {
+ if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
+ predictReg = PREDICT_REG;
+
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ // If we have 2 or more regs, free one of them
+ if (!genMaxOneBit(regMask))
+ {
+ /* Clear the 2nd lowest bit in regMask */
+ /* First set tmpMask to the lowest bit in regMask */
+ tmpMask = genFindLowestBit(regMask);
+ /* Next find the second lowest bit in regMask */
+ tmpMask = genFindLowestBit(regMask & ~tmpMask);
+ /* Clear this bit from regmask */
+ regMask &= ~tmpMask;
+ }
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ goto RETURN_CHECK;
+ }
+
+#if CPU_HAS_BYTE_REGS
+ /* cast from signed-byte is special - it uses byteable registers */
+ if (type == TYP_INT)
+ {
+ var_types smallType;
+
+ if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
+ smallType = tree->gtCast.CastOp()->TypeGet();
+ else
+ smallType = tree->gtCast.gtCastType;
+
+ if (smallType == TYP_BYTE)
+ {
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ if ((regMask & RBM_BYTE_REGS) == 0)
+ regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
+
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+ }
+#endif
+
+#if FEATURE_STACK_FP_X87
+ /* cast to float/double is special */
+ if (varTypeIsFloating(type))
+ {
+ switch (op1->TypeGet())
+ {
+ /* uses fild, so don't need to be loaded to reg */
+ case TYP_INT:
+ case TYP_LONG:
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ regMask = 0;
+ goto RETURN_CHECK;
+ default:
+ break;
+ }
+ }
+
+ /* Casting from integral type to floating type is special */
+ if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
+ {
+ if (opts.compCanUseSSE2)
+ {
+ // predict for SSE2 based casting
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ // Get one more int reg to hold cast result
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+ }
+#endif
+
+#if FEATURE_FP_REGALLOC
+ // Are we casting between int to float or float to int
+ // Fix 388428 ARM JitStress WP7
+ if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
+ {
+ // op1 needs to go into a register
+ regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+
+#ifdef _TARGET_ARM_
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ // We also need a fp scratch register for the convert operation
+ regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
+ PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
+ }
+#endif
+ // We also need a register to hold the result
+ regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+#endif
+
+ /* otherwise must load op1 into a register */
+ goto GENERIC_UNARY;
+
+ case GT_INTRINSIC:
+
+#ifdef _TARGET_XARCH_
+ if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
+ {
+ // This is a special case to handle the following
+ // optimization: conv.i4(round.d(d)) -> round.i(d)
+ // if flowgraph 3186
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
+
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+#endif
+ __fallthrough;
+
+ case GT_NEG:
+#ifdef _TARGET_ARM_
+ if (tree->TypeGet() == TYP_LONG)
+ {
+ // On ARM this consumes an extra register for the '0' value
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
+
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+#endif // _TARGET_ARM_
+
+ __fallthrough;
+
+ case GT_NOT:
+ // these unary operators will write new values
+ // and thus will need a scratch register
+ GENERIC_UNARY:
+ /* generic unary operators */
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ __fallthrough;
+
+ case GT_NOP:
+ // these unary operators do not write new values
+ // and thus won't need a scratch register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if OPT_BOOL_OPS
+ if (!op1)
+ {
+ tree->gtUsedRegs = 0;
+ regMask = 0;
+ goto RETURN_CHECK;
+ }
+#endif
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ goto RETURN_CHECK;
+
+ case GT_IND:
+ case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
+ {
+ bool intoReg = true;
+ VARSET_TP VARSET_INIT(this, startIndUseInPlaceVars, rpUseInPlace);
+
+ if (fgIsIndirOfAddrOfLocal(tree) != NULL)
+ {
+ compUpdateLifeVar</*ForCodeGen*/ false>(tree);
+ }
+
+ if (predictReg == PREDICT_ADDR)
+ {
+ intoReg = false;
+ }
+ else if (predictReg == PREDICT_NONE)
+ {
+ if (type != TYP_LONG)
+ {
+ intoReg = false;
+ }
+ else
+ {
+ predictReg = PREDICT_REG;
+ }
+ }
+
+ /* forcing to register? */
+ if (intoReg && (type != TYP_LONG))
+ {
+ rsvdRegs |= RBM_LASTUSE;
+ }
+
+ GenTreePtr lenCSE;
+ lenCSE = NULL;
+
+ /* check for address mode */
+ regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
+ tmpMask = RBM_NONE;
+
+#if CPU_LOAD_STORE_ARCH
+ // We may need a scratch register for loading a long
+ if (type == TYP_LONG)
+ {
+ /* This scratch register immediately dies */
+ tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
+ }
+#endif // CPU_LOAD_STORE_ARCH
+
+#ifdef _TARGET_ARM_
+ // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
+ //
+ if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
+ {
+ /* These integer register(s) immediately die */
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
+ // Two integer registers are required for a TYP_DOUBLE
+ if (type == TYP_DOUBLE)
+ tmpMask |=
+ rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
+ }
+#endif
+
+ /* forcing to register? */
+ if (intoReg)
+ {
+ regMaskTP lockedMask = lockedRegs | rsvdRegs;
+ tmpMask |= regMask;
+
+ // We will compute a new regMask that holds the register(s)
+ // that we will load the indirection into.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ if (type == TYP_LONG)
+ {
+ // We need to use multiple load instructions here:
+ // For the first register we can not choose
+ // any registers that are being used in place or
+ // any register in the current regMask
+ //
+ regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
+
+ // For the second register we can choose a register that was
+ // used in place or any register in the old now overwritten regMask
+ // but not the same register that we picked above in 'regMask'
+ //
+ VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
+ regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
+ }
+ else
+#endif
+ {
+ // We will use one load instruction here:
+ // The load target register can be a register that was used in place
+ // or one of the register from the orginal regMask.
+ //
+ VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
+ regMask = rpPredictRegPick(type, predictReg, lockedMask);
+ }
+ }
+ else if (predictReg != PREDICT_ADDR)
+ {
+ /* Unless the caller specified PREDICT_ADDR */
+ /* we don't return the temp registers used */
+ /* to form the address */
+ regMask = RBM_NONE;
+ }
+ }
+
+ tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
+
+ goto RETURN_CHECK;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+
+#ifdef _TARGET_X86_
+ /* Floating point comparison uses EAX for flags */
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ regMask = RBM_EAX;
+ }
+ else
+#endif
+ if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
+ {
+ // Some comparisons are converted to ?:
+ noway_assert(!fgMorphRelopToQmark(op1));
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ // The set instructions need a byte register
+ regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
+ }
+ else
+ {
+ regMask = RBM_NONE;
+#ifdef _TARGET_XARCH_
+ tmpMask = RBM_NONE;
+ // Optimize the compare with a constant cases for xarch
+ if (op1->gtOper == GT_CNS_INT)
+ {
+ if (op2->gtOper == GT_CNS_INT)
+ tmpMask =
+ rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
+ tree->gtUsedRegs = op2->gtUsedRegs;
+ goto RETURN_CHECK;
+ }
+ else if (op2->gtOper == GT_CNS_INT)
+ {
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ goto RETURN_CHECK;
+ }
+ else if (op2->gtOper == GT_CNS_LNG)
+ {
+ regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
+#ifdef _TARGET_X86_
+ // We also need one extra register to read values from
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
+#endif // _TARGET_X86_
+ tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
+ goto RETURN_CHECK;
+ }
+#endif // _TARGET_XARCH_
+ }
+
+ unsigned op1TypeSize;
+ unsigned op2TypeSize;
+
+ op1TypeSize = genTypeSize(op1->TypeGet());
+ op2TypeSize = genTypeSize(op2->TypeGet());
+
+ op1PredictReg = PREDICT_REG;
+ op2PredictReg = PREDICT_REG;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+#ifdef _TARGET_XARCH_
+ if (op1TypeSize == sizeof(int))
+ op1PredictReg = PREDICT_NONE;
+#endif
+
+ tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
+ }
+ else
+ {
+#ifdef _TARGET_XARCH_
+ // For full DWORD compares we can have
+ //
+ // op1 is an address mode and op2 is a register
+ // or
+ // op1 is a register and op2 is an address mode
+ //
+ if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
+ {
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = op2->gtLclVar.gtLclNum;
+ varDsc = lvaTable + lclNum;
+ /* Did we predict that this local will be enregistered? */
+ if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
+ {
+ op1PredictReg = PREDICT_ADDR;
+ }
+ }
+ }
+ // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
+ if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
+ op2PredictReg = PREDICT_ADDR;
+#endif // _TARGET_XARCH_
+
+ tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+#ifdef _TARGET_ARM_
+ if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
+#endif
+ {
+ rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
+ }
+ }
+
+#ifdef _TARGET_XARCH_
+ // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
+ // to generate a sign/zero extension before doing a compare. Save a register for this purpose
+ // if one of the registers is small and the types aren't equal.
+
+ if (regMask == RBM_NONE)
+ {
+ rpPredictReg op1xPredictReg, op2xPredictReg;
+ GenTreePtr op1x, op2x;
+ if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
+ {
+ op1xPredictReg = op2PredictReg;
+ op2xPredictReg = op1PredictReg;
+ op1x = op2;
+ op2x = op1;
+ }
+ else
+ {
+ op1xPredictReg = op1PredictReg;
+ op2xPredictReg = op2PredictReg;
+ op1x = op1;
+ op2x = op2;
+ }
+ if ((op1xPredictReg < PREDICT_REG) && // op1 doesn't get a register (probably an indir)
+ (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
+ varTypeIsSmall(op1x->TypeGet())) // op1 is smaller than an int
+ {
+ bool needTmp = false;
+
+ // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
+ // We could predict a byteable register for op2x, but what if we don't get it?
+ // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
+ // result.
+ if (varTypeIsByte(op1x->TypeGet()))
+ {
+ needTmp = true;
+ }
+ else
+ {
+ if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
+ {
+ if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
+ needTmp = true;
+ }
+ else
+ {
+ if (op1x->TypeGet() != op2x->TypeGet())
+ needTmp = true;
+ }
+ }
+ if (needTmp)
+ {
+ regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+ }
+ }
+ }
+#endif // _TARGET_XARCH_
+
+ tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+ goto RETURN_CHECK;
+
+ case GT_MUL:
+
+#ifndef _TARGET_AMD64_
+ if (type == TYP_LONG)
+ {
+ assert(tree->gtIsValid64RsltMul());
+
+ /* Strip out the cast nodes */
+
+ noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
+ op1 = op1->gtCast.CastOp();
+ op2 = op2->gtCast.CastOp();
+#else
+ if (false)
+ {
+#endif // !_TARGET_AMD64_
+ USE_MULT_EAX:
+
+#if defined(_TARGET_X86_)
+ // This will done by a 64-bit imul "imul eax, reg"
+ // (i.e. EDX:EAX = EAX * reg)
+
+ /* Are we supposed to evaluate op2 first? */
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
+ }
+ else
+ {
+ rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
+ }
+
+ /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
+
+ tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
+
+ /* set regMask to the set of held registers */
+
+ regMask = RBM_PAIR_TMP_LO;
+
+ if (type == TYP_LONG)
+ regMask |= RBM_PAIR_TMP_HI;
+
+#elif defined(_TARGET_ARM_)
+ // This will done by a 4 operand multiply
+
+ // Are we supposed to evaluate op2 first?
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
+ }
+ else
+ {
+ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
+ }
+
+ // set regMask to the set of held registers,
+ // the two scratch register we need to compute the mul result
+
+ regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+
+ // set gtUsedRegs toregMask and the registers needed by op1 and op2
+
+ tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
+#endif
+
+ goto RETURN_CHECK;
+ }
+ else
+ {
+ /* We use imulEAX for most unsigned multiply operations */
+ if (tree->gtOverflow())
+ {
+ if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
+ {
+ goto USE_MULT_EAX;
+ }
+ }
+ }
+
+ __fallthrough;
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+
+ case GT_SUB:
+ case GT_ADD:
+ tree->gtUsedRegs = 0;
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ GENERIC_BINARY:
+
+ noway_assert(op2);
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ op1PredictReg = PREDICT_REG;
+#if !CPU_LOAD_STORE_ARCH
+ if (genTypeSize(op1->gtType) >= sizeof(int))
+ op1PredictReg = PREDICT_NONE;
+#endif
+ regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
+ }
+ else
+ {
+ op2PredictReg = PREDICT_REG;
+#if !CPU_LOAD_STORE_ARCH
+ if (genTypeSize(op2->gtType) >= sizeof(int))
+ op2PredictReg = PREDICT_NONE;
+#endif
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+#ifdef _TARGET_ARM_
+ // For most ALU operations we can generate a single instruction that encodes
+ // a small immediate integer constant value. (except for multiply)
+ //
+ if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
+ {
+ ssize_t ival = op2->gtIntCon.gtIconVal;
+ if (codeGen->validImmForAlu(ival))
+ {
+ op2PredictReg = PREDICT_NONE;
+ }
+ else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
+ ((oper == GT_ADD) || (oper == GT_SUB)))
+ {
+ op2PredictReg = PREDICT_NONE;
+ }
+ }
+ if (op2PredictReg == PREDICT_NONE)
+ {
+ op2->gtUsedRegs = RBM_NONE;
+ }
+ else
+#endif
+ {
+ rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
+ }
+ }
+ tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+#if CPU_HAS_BYTE_REGS
+ /* We have special register requirements for byte operations */
+
+ if (varTypeIsByte(tree->TypeGet()))
+ {
+ /* For 8 bit arithmetic, one operands has to be in a
+ byte-addressable register, and the other has to be
+ in a byte-addrble reg or in memory. Assume its in a reg */
+
+ regMaskTP regByteMask = 0;
+ regMaskTP op1ByteMask = op1->gtUsedRegs;
+
+ if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
+ {
+ // Pick a Byte register to use for op1
+ regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
+ op1ByteMask = regByteMask;
+ }
+
+ if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
+ {
+ // Pick a Byte register to use for op2, avoiding the one used by op1
+ regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
+ }
+
+ if (regByteMask)
+ {
+ tree->gtUsedRegs |= regByteMask;
+ regMask = regByteMask;
+ }
+ }
+#endif
+ goto RETURN_CHECK;
+
+ case GT_DIV:
+ case GT_MOD:
+
+ case GT_UDIV:
+ case GT_UMOD:
+
+ /* non-integer division handled in generic way */
+ if (!varTypeIsIntegral(type))
+ {
+ tree->gtUsedRegs = 0;
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+ goto GENERIC_BINARY;
+ }
+
+#ifndef _TARGET_64BIT_
+
+ if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
+ {
+ /* Special case: a mod with an int op2 is done inline using idiv or div
+ to avoid a costly call to the helper */
+
+ noway_assert((op2->gtOper == GT_CNS_LNG) &&
+ (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
+
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
+ rsvdRegs | op1->gtRsvdRegs);
+ tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
+ }
+ else
+ {
+ tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ tmpMask |=
+ rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
+ }
+ regMask = RBM_PAIR_TMP;
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
+#endif // !_TARGET_X86_ && !_TARGET_ARM_
+
+ tree->gtUsedRegs =
+ (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
+ rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
+
+ goto RETURN_CHECK;
+ }
+#endif // _TARGET_64BIT_
+
+ /* no divide immediate, so force integer constant which is not
+ * a power of two to register
+ */
+
+ if (op2->OperKind() & GTK_CONST)
+ {
+ ssize_t ival = op2->gtIntConCommon.IconValue();
+
+ /* Is the divisor a power of 2 ? */
+
+ if (ival > 0 && genMaxOneBit(size_t(ival)))
+ {
+ goto GENERIC_UNARY;
+ }
+ else
+ op2PredictReg = PREDICT_SCRATCH_REG;
+ }
+ else
+ {
+ /* Non integer constant also must be enregistered */
+ op2PredictReg = PREDICT_REG;
+ }
+
+ regMaskTP trashedMask;
+ trashedMask = DUMMY_INIT(RBM_ILLEGAL);
+ regMaskTP op1ExcludeMask;
+ op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
+ regMaskTP op2ExcludeMask;
+ op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
+
+#ifdef _TARGET_XARCH_
+ /* Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
+ * we can safely allow the "b" value to die. Unfortunately, if we simply
+ * mark the node "b" as using EDX, this will not work if "b" is a register
+ * variable that dies with this particular reference. Thus, if we want to
+ * avoid this situation (where we would have to spill the variable from
+ * EDX to someplace else), we need to explicitly mark the interference
+ * of the variable at this point.
+ */
+
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
+ varDsc = lvaTable + lclNum;
+ if (varDsc->lvTracked)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
+ printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
+ varDsc->lvVarIndex);
+ if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
+ printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
+ varDsc->lvVarIndex);
+ }
+#endif
+ VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
+ VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
+ }
+ }
+
+ /* set the held register based on opcode */
+ if (oper == GT_DIV || oper == GT_UDIV)
+ regMask = RBM_EAX;
+ else
+ regMask = RBM_EDX;
+ trashedMask = (RBM_EAX | RBM_EDX);
+ op1ExcludeMask = 0;
+ op2ExcludeMask = (RBM_EAX | RBM_EDX);
+
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+ trashedMask = RBM_NONE;
+ op1ExcludeMask = RBM_NONE;
+ op2ExcludeMask = RBM_NONE;
+#endif
+
+ /* set the lvPref reg if possible */
+ GenTreePtr dest;
+ /*
+ * Walking the gtNext link twice from here should get us back
+ * to our parent node, if this is an simple assignment tree.
+ */
+ dest = tree->gtNext;
+ if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
+ dest->gtNext->gtOp.gtOp2 == tree)
+ {
+ varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
+ varDsc->addPrefReg(regMask, this);
+ }
+#ifdef _TARGET_XARCH_
+ op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
+#else
+ op1PredictReg = PREDICT_SCRATCH_REG;
+#endif
+
+ /* are we supposed to evaluate op2 first? */
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
+ rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
+ }
+ else
+ {
+ tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
+ rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
+ }
+#ifdef _TARGET_ARM_
+ regMask = tmpMask;
+#endif
+ /* grab EAX, EDX for this tree node */
+ tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+ goto RETURN_CHECK;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+#ifndef _TARGET_64BIT_
+ if (type == TYP_LONG)
+ {
+ if (op2->IsCnsIntOrI())
+ {
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+ // no register used by op2
+ op2->gtUsedRegs = 0;
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ }
+ else
+ {
+ // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
+ tmpMask = lockedRegs;
+ tmpMask &= ~RBM_LNGARG_0;
+ tmpMask &= ~RBM_SHIFT_LNG;
+
+ // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
+ tmpMask |= RBM_SHIFT_LNG;
+ // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
+ // Fix 383843 X86/ARM ILGEN
+ rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
+ rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
+ }
+ else
+ {
+ rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
+ tmpMask |= RBM_LNGARG_0;
+ // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
+ // Fix 383839 ARM ILGEN
+ rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
+ rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
+ }
+ regMask = RBM_LNGRET; // function return registers
+ op1->gtUsedRegs |= RBM_LNGARG_0;
+ op2->gtUsedRegs |= RBM_SHIFT_LNG;
+
+ tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
+
+ // We are using a helper function to do shift:
+ //
+ tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+ }
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+#ifdef _TARGET_XARCH_
+ if (!op2->IsCnsIntOrI())
+ predictReg = PREDICT_NOT_REG_ECX;
+#endif
+
+ HANDLE_SHIFT_COUNT:
+ // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
+
+ regMaskTP tmpRsvdRegs;
+
+ if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+ {
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rsvdRegs = RBM_LASTUSE;
+ tmpRsvdRegs = RBM_NONE;
+ }
+ else
+ {
+ regMask = RBM_NONE;
+ // Special case op1 of a constant
+ if (op1->IsCnsIntOrI())
+ tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
+ // System.Xml.Schema.BitSet:Get(int):bool
+ else
+ tmpRsvdRegs = op1->gtRsvdRegs;
+ }
+
+ op2Mask = RBM_NONE;
+ if (!op2->IsCnsIntOrI())
+ {
+ if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
+ {
+ op2PredictReg = PREDICT_REG_SHIFT;
+ }
+ else
+ {
+ op2PredictReg = PREDICT_REG;
+ }
+
+ /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
+ op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
+
+ // If our target arch has a REG_SHIFT register then
+ // we set the PrefReg when we have a LclVar for op2
+ // we add an interference with REG_SHIFT for any other LclVars alive at op2
+ if (REG_SHIFT != REG_NA)
+ {
+ VARSET_TP VARSET_INIT(this, liveSet, compCurLife);
+
+ while (op2->gtOper == GT_COMMA)
+ {
+ op2 = op2->gtOp.gtOp2;
+ }
+
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
+ varDsc->setPrefReg(REG_SHIFT, this);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
+ }
+ }
+
+ // Ensure that we have a register interference with the LclVar in tree's LiveSet,
+ // excluding the LclVar that was used for the shift amount as it is read-only
+ // and can be kept alive through the shift operation
+ //
+ rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
+ // In case op2Mask doesn't contain the required shift register,
+ // we will or it in now.
+ op2Mask |= RBM_SHIFT;
+ }
+ }
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ assert(regMask == RBM_NONE);
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
+ }
+
+#if CPU_HAS_BYTE_REGS
+ if (varTypeIsByte(type))
+ {
+ // Fix 383789 X86 ILGEN
+ // Fix 383813 X86 ILGEN
+ // Fix 383828 X86 ILGEN
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+ if (varDsc->lvTracked)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(op1VarBit,
+ VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
+
+ // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
+ rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
+ }
+ }
+ if ((regMask & RBM_BYTE_REGS) == 0)
+ {
+ // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+ // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
+ //
+ regMask |=
+ rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
+ }
+ }
+#endif
+ tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
+ }
+
+ goto RETURN_CHECK;
+
+ case GT_COMMA:
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (predictReg == PREDICT_NONE)
+ {
+ predictReg = PREDICT_REG;
+ }
+ else if (rpHasVarIndexForPredict(predictReg))
+ {
+ /* Don't propagate the use of tgt reg use in a GT_COMMA */
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+
+ regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
+ }
+ else
+ {
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+
+ /* CodeGen will enregister the op2 side of a GT_COMMA */
+ if (predictReg == PREDICT_NONE)
+ {
+ predictReg = PREDICT_REG;
+ }
+ else if (rpHasVarIndexForPredict(predictReg))
+ {
+ /* Don't propagate the use of tgt reg use in a GT_COMMA */
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+
+ regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+ }
+ // tree should only accumulate the used registers from the op2 side of the GT_COMMA
+ //
+ tree->gtUsedRegs = op2->gtUsedRegs;
+ if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
+ {
+ LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
+
+ if (op2VarDsc->lvTracked)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(op2VarBit, VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
+ rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
+ }
+ }
+ goto RETURN_CHECK;
+
+ case GT_QMARK:
+ {
+ noway_assert(op1 != NULL && op2 != NULL);
+
+ /*
+ * If the gtUsedRegs conflicts with lockedRegs
+ * then we going to have to spill some registers
+ * into the non-trashed register set to keep it alive
+ */
+ unsigned spillCnt;
+ spillCnt = 0;
+ regMaskTP spillRegs;
+ spillRegs = lockedRegs & tree->gtUsedRegs;
+
+ while (spillRegs)
+ {
+ /* Find the next register that needs to be spilled */
+ tmpMask = genFindLowestBit(spillRegs);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
+ gtDispTree(tree, 0, NULL, true);
+ }
+#endif
+ /* In Codegen it will typically introduce a spill temp here */
+ /* rather than relocating the register to a non trashed reg */
+ rpPredictSpillCnt++;
+ spillCnt++;
+
+ /* Remove it from the spillRegs and lockedRegs*/
+ spillRegs &= ~tmpMask;
+ lockedRegs &= ~tmpMask;
+ }
+ {
+ VARSET_TP VARSET_INIT(this, startQmarkCondUseInPlaceVars, rpUseInPlace);
+
+ /* Evaluate the <cond> subtree */
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+
+ noway_assert(op2->gtOper == GT_COLON);
+ if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
+ {
+ // Don't try to target the register specified in predictReg when we have complex subtrees
+ //
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ GenTreePtr elseTree = op2->AsColon()->ElseNode();
+ GenTreePtr thenTree = op2->AsColon()->ThenNode();
+
+ noway_assert(thenTree != NULL && elseTree != NULL);
+
+ // Update compCurLife to only those vars live on the <then> subtree
+
+ VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
+
+ if (type == TYP_VOID)
+ {
+ /* Evaluate the <then> subtree */
+ rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ regMask = RBM_NONE;
+ predictReg = PREDICT_NONE;
+ }
+ else
+ {
+ // A mask to use to force the predictor to choose low registers (to reduce code size)
+ regMaskTP avoidRegs = RBM_NONE;
+#ifdef _TARGET_ARM_
+ avoidRegs = (RBM_R12 | RBM_LR);
+#endif
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ /* Evaluate the <then> subtree */
+ regMask =
+ rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
+
+ if (regMask)
+ {
+ rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
+ if (op1PredictReg != PREDICT_NONE)
+ predictReg = op1PredictReg;
+ }
+ }
+
+ VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
+
+ /* Evaluate the <else> subtree */
+ // First record the post-then liveness, and reset the current liveness to the else
+ // branch liveness.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT(this, postThenLive, compCurLife);
+#endif
+
+ VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
+
+ rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
+ tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
+
+ // The then and the else are "virtual basic blocks" that form a control-flow diamond.
+ // They each have only one successor, which they share. Their live-out sets must equal the
+ // live-in set of this virtual successor block, and thus must be the same. We can assert
+ // that equality here.
+ assert(VarSetOps::Equal(this, compCurLife, postThenLive));
+
+ if (spillCnt > 0)
+ {
+ regMaskTP reloadMask = RBM_NONE;
+
+ while (spillCnt)
+ {
+ regMaskTP reloadReg;
+
+ /* Get an extra register to hold it */
+ reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
+ gtDispTree(tree, 0, NULL, true);
+ }
+#endif
+ reloadMask |= reloadReg;
+
+ spillCnt--;
+ }
+
+ /* update the gtUsedRegs mask */
+ tree->gtUsedRegs |= reloadMask;
+ }
+ }
+
+ goto RETURN_CHECK;
+ }
+ case GT_RETURN:
+ tree->gtUsedRegs = RBM_NONE;
+ regMask = RBM_NONE;
+
+ /* Is there a return value? */
+ if (op1 != NULL)
+ {
+#if FEATURE_FP_REGALLOC
+ if (varTypeIsFloating(type))
+ {
+ predictReg = PREDICT_FLTRET;
+ if (type == TYP_FLOAT)
+ regMask = RBM_FLOATRET;
+ else
+ regMask = RBM_DOUBLERET;
+ }
+ else
+#endif
+ if (isRegPairType(type))
+ {
+ predictReg = PREDICT_LNGRET;
+ regMask = RBM_LNGRET;
+ }
+ else
+ {
+ predictReg = PREDICT_INTRET;
+ regMask = RBM_INTRET;
+ }
+ if (info.compCallUnmanaged)
+ {
+ lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
+ }
+ rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ }
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
+ // We could optimize on registers based on int/long or no return value. But to
+ // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
+ if (compIsProfilerHookNeeded())
+ {
+ tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
+ }
+
+#endif
+ goto RETURN_CHECK;
+
+ case GT_RETFILT:
+ if (op1 != NULL)
+ {
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ regMask = genReturnRegForTree(tree);
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+ tree->gtUsedRegs = 0;
+ regMask = 0;
+
+ goto RETURN_CHECK;
+
+ case GT_JTRUE:
+ /* This must be a test of a relational operator */
+
+ noway_assert(op1->OperIsCompare());
+
+ /* Only condition code set by this operation */
+
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
+
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ regMask = 0;
+
+ goto RETURN_CHECK;
+
+ case GT_SWITCH:
+ noway_assert(type <= TYP_INT);
+ noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
+#ifdef _TARGET_ARM_
+ {
+ regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
+ unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
+ if (jumpCnt > 2)
+ {
+ // Table based switch requires an extra register for the table base
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+ }
+ tree->gtUsedRegs = op1->gtUsedRegs | regMask;
+ }
+#else // !_TARGET_ARM_
+ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+#endif // _TARGET_ARM_
+ regMask = 0;
+ goto RETURN_CHECK;
+
+ case GT_CKFINITE:
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+ // Need a reg to load exponent into
+ regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+ tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
+ goto RETURN_CHECK;
+
+ case GT_LCLHEAP:
+ regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
+ op2Mask = 0;
+
+#ifdef _TARGET_ARM_
+ if (info.compInitMem)
+ {
+ // We zero out two registers in the ARM codegen path
+ op2Mask |=
+ rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
+ }
+#endif
+
+ op1->gtUsedRegs |= (regMaskSmall)regMask;
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
+
+ // The result will be put in the reg we picked for the size
+ // regMask = <already set as we want it to be>
+
+ goto RETURN_CHECK;
+
+ case GT_OBJ:
+ {
+#ifdef _TARGET_ARM_
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
+ // registers (to reduce code size)
+ regMask = RBM_NONE;
+ tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
+#endif
+
+ if (fgIsIndirOfAddrOfLocal(tree) != NULL)
+ {
+ compUpdateLifeVar</*ForCodeGen*/ false>(tree);
+ }
+
+#ifdef _TARGET_ARM_
+ unsigned objSize = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
+ regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
+ // If it has one bit set, and that's an arg reg...
+ if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
+ {
+ // We are passing the 'obj' in the argument registers
+ //
+ regNumber rn = genRegNumFromMask(preferReg);
+
+ // Add the registers used to pass the 'obj' to regMask.
+ for (unsigned i = 0; i < objSize / 4; i++)
+ {
+ if (rn == MAX_REG_ARG)
+ break;
+ // Otherwise...
+ regMask |= genRegMask(rn);
+ rn = genRegArgNext(rn);
+ }
+ }
+ else
+ {
+ // We are passing the 'obj' in the outgoing arg space
+ // We will need one register to load into unless the 'obj' size is 4 or less.
+ //
+ if (objSize > 4)
+ {
+ regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
+ }
+ }
+ tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
+ goto RETURN_CHECK;
+#else // !_TARGET_ARM
+ goto GENERIC_UNARY;
+#endif // _TARGET_ARM_
+ }
+
+ case GT_MKREFANY:
+ {
+#ifdef _TARGET_ARM_
+ regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
+ regMask = RBM_NONE;
+ if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
+ {
+ // A MKREFANY takes up two registers.
+ regNumber rn = genRegNumFromMask(preferReg);
+ regMask = RBM_NONE;
+ if (rn < MAX_REG_ARG)
+ {
+ regMask |= genRegMask(rn);
+ rn = genRegArgNext(rn);
+ if (rn < MAX_REG_ARG)
+ regMask |= genRegMask(rn);
+ }
+ }
+ if (regMask != RBM_NONE)
+ {
+ // Condensation of GENERIC_BINARY path.
+ assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+ op2PredictReg = PREDICT_REG;
+ regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
+ regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+ tree->gtUsedRegs = op1->gtUsedRegs;
+#endif // _TARGET_ARM_
+ goto GENERIC_BINARY;
+ }
+
+ case GT_BOX:
+ goto GENERIC_UNARY;
+
+ case GT_LOCKADD:
+ goto GENERIC_BINARY;
+
+ case GT_XADD:
+ case GT_XCHG:
+ // Ensure we can write to op2. op2 will hold the output.
+ if (predictReg < PREDICT_SCRATCH_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+ regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
+ }
+ else
+ {
+ regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+ op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
+ }
+ tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
+ goto RETURN_CHECK;
+
+ case GT_ARR_LENGTH:
+ goto GENERIC_UNARY;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected simple operator in reg use prediction");
+ break;
+ }
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ GenTreePtr args;
+ GenTreeArgList* list;
+ regMaskTP keepMask;
+ unsigned regArgsNum;
+ int regIndex;
+ regMaskTP regArgMask;
+ regMaskTP curArgMask;
+
+ case GT_CALL:
+
+ {
+
+ /* initialize so we can just or in various bits */
+ tree->gtUsedRegs = RBM_NONE;
+
+#if GTF_CALL_REG_SAVE
+ /*
+ * Unless the GTF_CALL_REG_SAVE flag is set,
+ * we can't preserve the RBM_CALLEE_TRASH registers.
+ * (likewise we can't preserve the return registers)
+ * So we remove them from the lockedRegs set and
+ * record any of them in the keepMask
+ */
+
+ if (tree->gtFlags & GTF_CALL_REG_SAVE)
+ {
+ regMaskTP trashMask = genReturnRegForTree(tree);
+
+ keepMask = lockedRegs & trashMask;
+ lockedRegs &= ~trashMask;
+ }
+ else
+#endif
+ {
+ keepMask = lockedRegs & RBM_CALLEE_TRASH;
+ lockedRegs &= ~RBM_CALLEE_TRASH;
+ }
+
+ regArgsNum = 0;
+ regIndex = 0;
+
+ /* Is there an object pointer? */
+ if (tree->gtCall.gtCallObjp)
+ {
+ /* Evaluate the instance pointer first */
+
+ args = tree->gtCall.gtCallObjp;
+
+ /* the objPtr always goes to an integer register (through temp or directly) */
+ noway_assert(regArgsNum == 0);
+ regArgsNum++;
+
+ /* Must be passed in a register */
+
+ noway_assert(args->gtFlags & GTF_LATE_ARG);
+
+ /* Must be either a deferred reg arg node or a GT_ASG node */
+
+ noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
+ args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
+
+ if (!args->IsArgPlaceHolderNode())
+ {
+ rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ }
+ }
+ VARSET_TP VARSET_INIT_NOCOPY(startArgUseInPlaceVars, VarSetOps::UninitVal());
+ VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
+
+ /* process argument list */
+ for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
+ {
+ args = list->Current();
+
+ if (args->gtFlags & GTF_LATE_ARG)
+ {
+ /* Must be either a Placeholder/NOP node or a GT_ASG node */
+
+ noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
+ args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
+
+ if (!args->IsArgPlaceHolderNode())
+ {
+ rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ }
+
+ regArgsNum++;
+ }
+ else
+ {
+#ifdef FEATURE_FIXED_OUT_ARGS
+ // We'll store this argument into the outgoing argument area
+ // It needs to be in a register to be stored.
+ //
+ predictReg = PREDICT_REG;
+
+#else // !FEATURE_FIXED_OUT_ARGS
+ // We'll generate a push for this argument
+ //
+ predictReg = PREDICT_NONE;
+ if (varTypeIsSmall(args->TypeGet()))
+ {
+ /* We may need to sign or zero extend a small type using a register */
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+#endif
+
+ rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
+ }
+ VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
+ tree->gtUsedRegs |= args->gtUsedRegs;
+ }
+
+ /* Is there a late argument list */
+
+ regIndex = 0;
+ regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
+ args = NULL;
+
+ /* process the late argument list */
+ for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
+ {
+ // If the current argument being copied is a promoted struct local, set this pointer to its description.
+ LclVarDsc* promotedStructLocal = NULL;
+
+ curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
+ tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
+
+ assert(list->IsList());
+
+ args = list->Current();
+ list = list->Rest();
+
+ assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree, args);
+ assert(curArgTabEntry);
+
+ regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
+ unsigned numSlots =
+ curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
+
+ rpPredictReg argPredictReg;
+ regMaskTP avoidReg = RBM_NONE;
+
+ if (regNum != REG_STK)
+ {
+ argPredictReg = rpGetPredictForReg(regNum);
+ curArgMask |= genRegMask(regNum);
+ }
+ else
+ {
+ assert(numSlots > 0);
+ argPredictReg = PREDICT_NONE;
+#ifdef _TARGET_ARM_
+ // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
+ avoidReg = (RBM_R12 | RBM_LR);
+#endif
+ }
+
+#ifdef _TARGET_ARM_
+ // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
+ //
+ if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
+ {
+ // 64-bit longs and doubles require 2 consecutive argument registers
+ curArgMask |= genRegMask(REG_NEXT(regNum));
+ }
+ else if (args->TypeGet() == TYP_STRUCT)
+ {
+ GenTreePtr argx = args;
+ GenTreePtr lclVarTree = NULL;
+
+ /* The GT_OBJ may be be a child of a GT_COMMA */
+ while (argx->gtOper == GT_COMMA)
+ {
+ argx = argx->gtOp.gtOp2;
+ }
+ unsigned originalSize = 0;
+
+ if (argx->gtOper == GT_OBJ)
+ {
+ originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
+
+ // Is it the address of a promoted struct local?
+ if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ lclVarTree = argx->gtObj.gtOp1->gtOp.gtOp1;
+ LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
+ if (varDsc->lvPromoted)
+ promotedStructLocal = varDsc;
+ }
+ }
+ else if (argx->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + argx->gtLclVarCommon.gtLclNum;
+ originalSize = varDsc->lvSize();
+
+ // Is it a promoted struct local?
+ if (varDsc->lvPromoted)
+ promotedStructLocal = varDsc;
+ }
+ else if (argx->gtOper == GT_MKREFANY)
+ {
+ originalSize = 2 * TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
+ }
+
+ // We only pass arguments differently if it a struct local "independently" promoted, which
+ // allows the field locals can be independently enregistered.
+ if (promotedStructLocal != NULL)
+ {
+ if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
+ promotedStructLocal = NULL;
+ }
+
+ unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
+
+ // Are we passing a TYP_STRUCT in multiple integer registers?
+ // if so set up curArgMask to reflect this
+ // Also slots is updated to reflect the number of outgoing arg slots that we will write
+ if (regNum != REG_STK)
+ {
+ regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
+ assert(genIsValidReg(regNum));
+ regNumber nextReg = REG_NEXT(regNum);
+ slots--;
+ while (slots > 0 && nextReg <= regLast)
+ {
+ curArgMask |= genRegMask(nextReg);
+ nextReg = REG_NEXT(nextReg);
+ slots--;
+ }
+ }
+
+ if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
+ {
+ // All or a portion of this struct will be placed in the argument registers indicated by
+ // "curArgMask". We build in knowledge of the order in which the code is generated here, so
+ // that the second arg to be evaluated interferes with the reg for the first, the third with
+ // the regs for the first and second, etc. But since we always place the stack slots before
+ // placing the register slots we do not add inteferences for any part of the struct that gets
+ // passed on the stack.
+
+ argPredictReg =
+ PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
+ regMaskTP prevArgMask = RBM_NONE;
+ for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
+ {
+ LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
+ if (fieldVarDsc->lvTracked)
+ {
+ assert(lclVarTree != NULL);
+ if (prevArgMask != RBM_NONE)
+ {
+ rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
+ DEBUGARG("fieldVar/argReg"));
+ }
+ }
+ // Now see many registers this uses up.
+ unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
+ unsigned nextAfterLastRegOffset =
+ (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
+ TARGET_POINTER_SIZE;
+ unsigned nextAfterLastArgRegOffset =
+ min(nextAfterLastRegOffset,
+ genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
+
+ for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
+ regOffset++)
+ {
+ prevArgMask |= genRegMask(regNumber(regNum + regOffset));
+ }
+
+ if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
+ {
+ break;
+ }
+
+ if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
+ {
+ // Add the argument register used here as a preferred register for this fieldVarDsc
+ //
+ regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
+ fieldVarDsc->setPrefReg(firstRegUsed, this);
+ }
+ }
+ compUpdateLifeVar</*ForCodeGen*/ false>(argx);
+ }
+
+ // If slots is greater than zero then part or all of this TYP_STRUCT
+ // argument is passed in the outgoing argument area. (except HFA arg)
+ //
+ if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
+ {
+ // We will need a register to address the TYP_STRUCT
+ // Note that we can use an argument register in curArgMask as in
+ // codegen we pass the stack portion of the argument before we
+ // setup the register part.
+ //
+
+ // Force the predictor to choose a LOW_REG here to reduce code bloat
+ avoidReg = (RBM_R12 | RBM_LR);
+
+ assert(tmpMask == RBM_NONE);
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
+
+ // If slots > 1 then we will need a second register to perform the load/store into the outgoing
+ // arg area
+ if (slots > 1)
+ {
+ tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
+ lockedRegs | regArgMask | tmpMask | avoidReg);
+ }
+ }
+ } // (args->TypeGet() == TYP_STRUCT)
+#endif // _TARGET_ARM_
+
+ // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
+ // as we have already calculated the correct tmpMask and curArgMask values and
+ // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
+ //
+ if (promotedStructLocal == NULL)
+ {
+ /* Target the appropriate argument register */
+ tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
+ }
+
+ // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
+ // for the duration of the OBJ.
+ if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
+ {
+ GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
+ assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
+ compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
+ }
+
+ regArgMask |= curArgMask;
+ args->gtUsedRegs |= (tmpMask | regArgMask);
+ tree->gtUsedRegs |= args->gtUsedRegs;
+ tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
+
+ if (args->gtUsedRegs != RBM_NONE)
+ {
+ // Add register interference with the set of registers used or in use when we evaluated
+ // the current arg, with whatever is alive after the current arg
+ //
+ rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
+ }
+ VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
+ }
+ assert(list == NULL);
+
+ regMaskTP callAddrMask;
+ callAddrMask = RBM_NONE;
+#if CPU_LOAD_STORE_ARCH
+ predictReg = PREDICT_SCRATCH_REG;
+#else
+ predictReg = PREDICT_NONE;
+#endif
+
+ switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+ {
+ case GTF_CALL_VIRT_STUB:
+
+ // We only want to record an interference between the virtual stub
+ // param reg and anything that's live AFTER the call, but we've not
+ // yet processed the indirect target. So add RBM_VIRTUAL_STUB_PARAM
+ // to interferingRegs.
+ interferingRegs |= RBM_VIRTUAL_STUB_PARAM;
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with Virtual Stub Param\n");
+#endif
+ codeGen->regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ predictReg = PREDICT_REG_VIRTUAL_STUB_PARAM;
+ }
+ break;
+
+ case GTF_CALL_VIRT_VTABLE:
+ predictReg = PREDICT_SCRATCH_REG;
+ break;
+
+ case GTF_CALL_NONVIRT:
+ predictReg = PREDICT_SCRATCH_REG;
+ break;
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+ if (tree->gtCall.gtCallCookie)
+ {
+ codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+ callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
+ lockedRegs | regArgMask, RBM_LASTUSE);
+
+ // Just in case we predict some other registers, force interference with our two special
+ // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
+ callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+ predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
+ }
+#endif
+ callAddrMask |=
+ rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
+ }
+ else if (predictReg != PREDICT_NONE)
+ {
+ callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
+ }
+
+ if (tree->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ // Need a register for tcbReg
+ callAddrMask |=
+ rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
+#if CPU_LOAD_STORE_ARCH
+ // Need an extra register for tmpReg
+ callAddrMask |=
+ rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
+#endif
+ }
+
+ tree->gtUsedRegs |= callAddrMask;
+
+ /* After the call restore the orginal value of lockedRegs */
+ lockedRegs |= keepMask;
+
+ /* set the return register */
+ regMask = genReturnRegForTree(tree);
+
+ if (regMask & rsvdRegs)
+ {
+ // We will need to relocate the return register value
+ regMaskTP intRegMask = (regMask & RBM_ALLINT);
+#if FEATURE_FP_REGALLOC
+ regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
+#endif
+ regMask = RBM_NONE;
+
+ if (intRegMask)
+ {
+ if (intRegMask == RBM_INTRET)
+ {
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ else if (intRegMask == RBM_LNGRET)
+ {
+ regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ else
+ {
+ noway_assert(!"unexpected return regMask");
+ }
+ }
+
+#if FEATURE_FP_REGALLOC
+ if (floatRegMask)
+ {
+ if (floatRegMask == RBM_FLOATRET)
+ {
+ regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ else if (floatRegMask == RBM_DOUBLERET)
+ {
+ regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ else // HFA return case
+ {
+ for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
+ {
+ regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ }
+ }
+#endif
+ }
+
+ /* the return registers (if any) are killed */
+ tree->gtUsedRegs |= regMask;
+
+#if GTF_CALL_REG_SAVE
+ if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
+#endif
+ {
+ /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
+ tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+ }
+ }
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ // Mark required registers for emitting tailcall profiler callback as used
+ if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
+ {
+ tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
+ }
+#endif
+ break;
+
+ case GT_ARR_ELEM:
+
+ // Figure out which registers can't be touched
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
+
+ regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
+
+ regMaskTP dimsMask;
+ dimsMask = 0;
+
+#if CPU_LOAD_STORE_ARCH
+ // We need a register to load the bounds of the MD array
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+#endif
+
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ /* We need scratch registers to compute index-lower_bound.
+ Also, gtArrInds[0]'s register will be used as the second
+ addressability register (besides gtArrObj's) */
+
+ regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
+ lockedRegs | regMask | dimsMask, rsvdRegs);
+ if (dim == 0)
+ regMask |= dimMask;
+
+ dimsMask |= dimMask;
+ }
+#ifdef _TARGET_XARCH_
+ // INS_imul doesnt have an immediate constant.
+ if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
+#endif
+ tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
+ break;
+
+ case GT_CMPXCHG:
+ {
+#ifdef _TARGET_XARCH_
+ rsvdRegs |= RBM_EAX;
+#endif
+ if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
+ {
+ regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
+ }
+ else
+ {
+ regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
+ }
+ op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
+
+#ifdef _TARGET_XARCH_
+ rsvdRegs &= ~RBM_EAX;
+ tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
+ rsvdRegs | regMask | op2Mask);
+ tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
+ predictReg = PREDICT_REG_EAX; // When this is done the result is always in EAX.
+#else
+ tmpMask = 0;
+ tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
+#endif
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+ {
+ regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
+ regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
+ rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
+
+ tree->gtUsedRegs =
+ (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
+ }
+ break;
+
+ default:
+ NO_WAY("unexpected special operator in reg use prediction");
+ break;
+ }
+
+RETURN_CHECK:
+
+#ifdef DEBUG
+ /* make sure we set them to something reasonable */
+ if (tree->gtUsedRegs & RBM_ILLEGAL)
+ noway_assert(!"used regs not set properly in reg use prediction");
+
+ if (regMask & RBM_ILLEGAL)
+ noway_assert(!"return value not set propery in reg use prediction");
+
+#endif
+
+ /*
+ * If the gtUsedRegs conflicts with lockedRegs
+ * then we going to have to spill some registers
+ * into the non-trashed register set to keep it alive
+ */
+ regMaskTP spillMask;
+ spillMask = tree->gtUsedRegs & lockedRegs;
+
+ if (spillMask)
+ {
+ while (spillMask)
+ {
+ /* Find the next register that needs to be spilled */
+ tmpMask = genFindLowestBit(spillMask);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
+ gtDispTree(tree, 0, NULL, true);
+ if ((tmpMask & regMask) == 0)
+ {
+ printf("Predict reload of %s after : ", getRegName(genRegNumFromMask(tmpMask)));
+ gtDispTree(tree, 0, NULL, true);
+ }
+ }
+#endif
+ /* In Codegen it will typically introduce a spill temp here */
+ /* rather than relocating the register to a non trashed reg */
+ rpPredictSpillCnt++;
+
+ /* Remove it from the spillMask */
+ spillMask &= ~tmpMask;
+ }
+ }
+
+ /*
+ * If the return registers in regMask conflicts with the lockedRegs
+ * then we allocate extra registers for the reload of the conflicting
+ * registers.
+ *
+ * Set spillMask to the set of locked registers that have to be reloaded here.
+ * reloadMask is set to the extra registers that are used to reload
+ * the spilled lockedRegs.
+ */
+
+ noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
+ spillMask = lockedRegs & regMask;
+
+ if (spillMask)
+ {
+ /* Remove the spillMask from regMask */
+ regMask &= ~spillMask;
+
+ regMaskTP reloadMask = RBM_NONE;
+ while (spillMask)
+ {
+ /* Get an extra register to hold it */
+ regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
+ gtDispTree(tree, 0, NULL, true);
+ }
+#endif
+ reloadMask |= reloadReg;
+
+ /* Remove it from the spillMask */
+ spillMask &= ~genFindLowestBit(spillMask);
+ }
+
+ /* Update regMask to use the reloadMask */
+ regMask |= reloadMask;
+
+ /* update the gtUsedRegs mask */
+ tree->gtUsedRegs |= (regMaskSmall)regMask;
+ }
+
+ regMaskTP regUse = tree->gtUsedRegs;
+ regUse |= interferingRegs;
+
+ if (!VarSetOps::IsEmpty(this, compCurLife))
+ {
+ // Add interference between the current set of live variables and
+ // the set of temporary registers need to evaluate the sub tree
+ if (regUse)
+ {
+ rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
+ }
+ }
+
+ if (rpAsgVarNum != -1)
+ {
+ // Add interference between the registers used (if any)
+ // and the assignment target variable
+ if (regUse)
+ {
+ rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
+ }
+
+ // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
+ // side of the assignment passed here using PREDICT_REG_VAR_Txx)
+ // to the set of currently live variables. This new interference will prevent us
+ // from using the register value used here for enregistering different live variable
+ //
+ if (!VarSetOps::IsEmpty(this, compCurLife))
+ {
+ rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
+ }
+ }
+
+ /* Do we need to resore the oldLastUseVars value */
+ if (restoreLastUseVars)
+ {
+ /* If we used a GT_ASG targeted register then we need to add
+ * a variable interference between any new last use variables
+ * and the GT_ASG targeted register
+ */
+ if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
+ {
+ rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
+ DEBUGARG("asgn tgt last use conflict"));
+ }
+ VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
+ }
+
+ return regMask;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#endif // LEGACY_BACKEND
+
+/****************************************************************************/
+/* Returns true when we must create an EBP frame
+ This is used to force most managed methods to have EBP based frames
+ which allows the ETW kernel stackwalker to walk the stacks of managed code
+ this allows the kernel to perform light weight profiling
+ */
+bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
+{
+ bool result = false;
+#ifdef DEBUG
+ const char* reason = nullptr;
+#endif
+
+#if ETW_EBP_FRAMED
+ if (!result && (opts.MinOpts() || opts.compDbgCode))
+ {
+ INDEBUG(reason = "Debug Code");
+ result = true;
+ }
+ if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
+ {
+ INDEBUG(reason = "IL Code Size");
+ result = true;
+ }
+ if (!result && (fgBBcount > 3))
+ {
+ INDEBUG(reason = "BasicBlock Count");
+ result = true;
+ }
+ if (!result && fgHasLoops)
+ {
+ INDEBUG(reason = "Method has Loops");
+ result = true;
+ }
+ if (!result && (optCallCount >= 2))
+ {
+ INDEBUG(reason = "Call Count");
+ result = true;
+ }
+ if (!result && (optIndirectCallCount >= 1))
+ {
+ INDEBUG(reason = "Indirect Call");
+ result = true;
+ }
+#endif // ETW_EBP_FRAMED
+
+ // VM wants to identify the containing frame of an InlinedCallFrame always
+ // via the frame register never the stack register so we need a frame.
+ if (!result && (optNativeCallCount != 0))
+ {
+ INDEBUG(reason = "Uses PInvoke");
+ result = true;
+ }
+
+#ifdef _TARGET_ARM64_
+ // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
+ // pointer frames.
+ if (!result)
+ {
+ INDEBUG(reason = "Temporary ARM64 force frame pointer");
+ result = true;
+ }
+#endif // _TARGET_ARM64_
+
+#ifdef DEBUG
+ if ((result == true) && (wbReason != nullptr))
+ {
+ *wbReason = reason;
+ }
+#endif
+
+ return result;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+/*****************************************************************************
+ *
+ * Predict which variables will be assigned to registers
+ * This is x86 specific and only predicts the integer registers and
+ * must be conservative, any register that is predicted to be enregister
+ * must end up being enregistered.
+ *
+ * The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
+ * predicted to be enregistered to minimize calls to rpPredictRegPick.
+ *
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
+{
+ unsigned regInx;
+
+ if (rpPasses <= rpPassesPessimize)
+ {
+ // Assume that we won't have to reverse EBP enregistration
+ rpReverseEBPenreg = false;
+
+ // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
+ if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+ rpFrameType = FT_EBP_FRAME;
+ else
+ rpFrameType = FT_ESP_FRAME;
+ }
+
+#if !ETW_EBP_FRAMED
+ // If we are using FPBASE as the frame register, we cannot also use it for
+ // a local var
+ if (rpFrameType == FT_EBP_FRAME)
+ {
+ regAvail &= ~RBM_FPBASE;
+ }
+#endif // !ETW_EBP_FRAMED
+
+ rpStkPredict = 0;
+ rpPredictAssignMask = regAvail;
+
+ raSetupArgMasks(&codeGen->intRegState);
+#if !FEATURE_STACK_FP_X87
+ raSetupArgMasks(&codeGen->floatRegState);
+#endif
+
+ // If there is a secret stub param, it is also live in
+ if (info.compPublishStubParam)
+ {
+ codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
+ }
+
+ if (regAvail == RBM_NONE)
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+#if FEATURE_STACK_FP_X87
+ if (!varDsc->IsFloatRegType())
+#endif
+ {
+ varDsc->lvRegNum = REG_STK;
+ if (isRegPairType(varDsc->lvType))
+ varDsc->lvOtherReg = REG_STK;
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
+ printf("\n Available registers = ");
+ dspRegMask(regAvail);
+ printf("\n");
+ }
+#endif
+
+ if (regAvail == RBM_NONE)
+ {
+ return RBM_NONE;
+ }
+
+ /* We cannot change the lvVarIndexes at this point, so we */
+ /* can only re-order the existing set of tracked variables */
+ /* Which will change the order in which we select the */
+ /* locals for enregistering. */
+
+ assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
+
+ // Should not be set unless optimizing
+ noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
+
+ if (lvaSortAgain)
+ lvaSortOnly();
+
+#ifdef DEBUG
+ fgDebugCheckBBlist();
+#endif
+
+ /* Initialize the weighted count of variables that could have */
+ /* been enregistered but weren't */
+ unsigned refCntStk = 0; // sum of ref counts for all stack based variables
+ unsigned refCntEBP = 0; // sum of ref counts for EBP enregistered variables
+ unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
+#if DOUBLE_ALIGN
+ unsigned refCntStkParam; // sum of ref counts for all stack based parameters
+ unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
+
+#if FEATURE_STACK_FP_X87
+ refCntStkParam = raCntStkParamDblStackFP;
+ refCntWtdStkDbl = raCntWtdStkDblStackFP;
+ refCntStk = raCntStkStackFP;
+#else
+ refCntStkParam = 0;
+ refCntWtdStkDbl = 0;
+ refCntStk = 0;
+#endif // FEATURE_STACK_FP_X87
+
+#endif // DOUBLE_ALIGN
+
+ /* Set of registers used to enregister variables in the predition */
+ regMaskTP regUsed = RBM_NONE;
+
+ /*-------------------------------------------------------------------------
+ *
+ * Predict/Assign the enregistered locals in ref-count order
+ *
+ */
+
+ VARSET_TP VARSET_INIT_NOCOPY(unprocessedVars, VarSetOps::MakeFull(this));
+
+ unsigned FPRegVarLiveInCnt;
+ FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
+
+ LclVarDsc* varDsc;
+ for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
+ {
+ bool notWorthy = false;
+
+ unsigned varIndex;
+ bool isDouble;
+ regMaskTP regAvailForType;
+ var_types regType;
+ regMaskTP avoidReg;
+ unsigned customVarOrderSize;
+ regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
+ bool firstHalf;
+ regNumber saveOtherReg;
+
+ varDsc = lvaRefSorted[sortNum];
+
+#if FEATURE_STACK_FP_X87
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+#ifdef DEBUG
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
+ // be en-registered.
+ noway_assert(!varDsc->lvRegister);
+ }
+#endif
+ continue;
+ }
+#endif
+
+ /* Check the set of invariant things that would prevent enregistration */
+
+ /* Ignore the variable if it's not tracked */
+ if (!varDsc->lvTracked)
+ goto CANT_REG;
+
+ /* Get hold of the index and the interference mask for the variable */
+ varIndex = varDsc->lvVarIndex;
+
+ // Remove 'varIndex' from unprocessedVars
+ VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
+
+ // Skip the variable if it's marked as DoNotEnregister.
+
+ if (varDsc->lvDoNotEnregister)
+ goto CANT_REG;
+
+ /* TODO: For now if we have JMP all register args go to stack
+ * TODO: Later consider extending the life of the argument or make a copy of it */
+
+ if (compJmpOpUsed && varDsc->lvIsRegArg)
+ goto CANT_REG;
+
+ /* Skip the variable if the ref count is zero */
+
+ if (varDsc->lvRefCnt == 0)
+ goto CANT_REG;
+
+ /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
+
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ goto CANT_REG;
+ }
+
+ /* Is the unweighted ref count too low to be interesting? */
+
+ if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
+ (varDsc->lvRefCnt <= 1))
+ {
+ /* Sometimes it's useful to enregister a variable with only one use */
+ /* arguments referenced in loops are one example */
+
+ if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
+ goto OK_TO_ENREGISTER;
+
+ /* If the variable has a preferred register set it may be useful to put it there */
+ if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
+ goto OK_TO_ENREGISTER;
+
+ /* Keep going; the table is sorted by "weighted" ref count */
+ goto CANT_REG;
+ }
+
+ OK_TO_ENREGISTER:
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regType = varDsc->TypeGet();
+ regAvailForType = regAvail & RBM_ALLFLOAT;
+ }
+ else
+ {
+ regType = TYP_INT;
+ regAvailForType = regAvail & RBM_ALLINT;
+ }
+
+#ifdef _TARGET_ARM_
+ isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
+
+ if (isDouble)
+ {
+ regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
+ }
+#endif
+
+ /* If we don't have any registers available then skip the enregistration attempt */
+ if (regAvailForType == RBM_NONE)
+ goto NO_REG;
+
+ // On the pessimize passes don't even try to enregister LONGS
+ if (isRegPairType(varDsc->lvType))
+ {
+ if (rpPasses > rpPassesPessimize)
+ goto NO_REG;
+ else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
+ goto NO_REG;
+ }
+
+ // Set of registers to avoid when performing register allocation
+ avoidReg = RBM_NONE;
+
+ if (!varDsc->lvIsRegArg)
+ {
+ /* For local variables,
+ * avoid the incoming arguments,
+ * but only if you conflict with them */
+
+ if (raAvoidArgRegMask != 0)
+ {
+ LclVarDsc* argDsc;
+ LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
+
+ for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
+ {
+ if (!argDsc->lvIsRegArg)
+ continue;
+
+ bool isFloat = argDsc->IsFloatRegType();
+ regNumber inArgReg = argDsc->lvArgReg;
+ regMaskTP inArgBit = genRegMask(inArgReg);
+
+ // Is this inArgReg in the raAvoidArgRegMask set?
+
+ if (!(raAvoidArgRegMask & inArgBit))
+ continue;
+
+ noway_assert(argDsc->lvIsParam);
+ noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
+
+ unsigned locVarIndex = varDsc->lvVarIndex;
+ unsigned argVarIndex = argDsc->lvVarIndex;
+
+ /* Does this variable interfere with the arg variable ? */
+ if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
+ {
+ noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
+ /* Yes, so try to avoid the incoming arg reg */
+ avoidReg |= inArgBit;
+ }
+ else
+ {
+ noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
+ }
+ }
+ }
+ }
+
+ // Now we will try to predict which register the variable
+ // could be enregistered in
+
+ customVarOrderSize = MAX_VAR_ORDER_SIZE;
+
+ raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
+
+ firstHalf = false;
+ saveOtherReg = DUMMY_INIT(REG_NA);
+
+ for (regInx = 0; regInx < customVarOrderSize; regInx++)
+ {
+ regNumber regNum = customVarOrder[regInx];
+ regMaskTP regBits = genRegMask(regNum);
+
+ /* Skip this register if it isn't available */
+ if ((regAvailForType & regBits) == 0)
+ continue;
+
+ /* Skip this register if it interferes with the variable */
+
+ if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
+ continue;
+
+ if (varTypeIsFloating(regType))
+ {
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ regNumber regNext = REG_NEXT(regNum);
+ regBits |= genRegMask(regNext);
+
+ /* Skip if regNext interferes with the variable */
+ if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
+ continue;
+ }
+#endif
+ }
+
+ bool firstUseOfReg = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
+ bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
+ bool calleeSavedReg = ((regBits & RBM_CALLEE_SAVED) != 0);
+
+ /* Skip this register if the weighted ref count is less than two
+ and we are considering a unused callee saved register */
+
+ if (lessThanTwoRefWtd && // less than two references (weighted)
+ firstUseOfReg && // first use of this register
+ calleeSavedReg) // callee saved register
+ {
+ unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
+
+ // psc is abbeviation for possibleSameColor
+ VARSET_TP VARSET_INIT_NOCOPY(pscVarSet, VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
+
+ VARSET_ITER_INIT(this, pscIndexIter, pscVarSet, pscIndex);
+ while (pscIndexIter.NextElem(this, &pscIndex))
+ {
+ LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
+ totalRefCntWtd += pscVar->lvRefCntWtd;
+ if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
+ break;
+ }
+
+ if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
+ {
+ notWorthy = true;
+ continue; // not worth spilling a callee saved register
+ }
+ // otherwise we will spill this callee saved registers,
+ // because its uses when combined with the uses of
+ // other yet to be processed candidates exceed our threshold.
+ // totalRefCntWtd = totalRefCntWtd;
+ }
+
+ /* Looks good - mark the variable as living in the register */
+
+ if (isRegPairType(varDsc->lvType))
+ {
+ if (firstHalf == false)
+ {
+ /* Enregister the first half of the long */
+ varDsc->lvRegNum = regNum;
+ saveOtherReg = varDsc->lvOtherReg;
+ varDsc->lvOtherReg = REG_STK;
+ firstHalf = true;
+ }
+ else
+ {
+ /* Ensure 'well-formed' register pairs */
+ /* (those returned by gen[Pick|Grab]RegPair) */
+
+ if (regNum < varDsc->lvRegNum)
+ {
+ varDsc->lvOtherReg = varDsc->lvRegNum;
+ varDsc->lvRegNum = regNum;
+ }
+ else
+ {
+ varDsc->lvOtherReg = regNum;
+ }
+ firstHalf = false;
+ }
+ }
+ else
+ {
+ varDsc->lvRegNum = regNum;
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ varDsc->lvOtherReg = REG_NEXT(regNum);
+ }
+#endif
+ }
+
+ if (regNum == REG_FPBASE)
+ {
+ refCntEBP += varDsc->lvRefCnt;
+ refCntWtdEBP += varDsc->lvRefCntWtd;
+#if DOUBLE_ALIGN
+ if (varDsc->lvIsParam)
+ {
+ refCntStkParam += varDsc->lvRefCnt;
+ }
+#endif
+ }
+
+ /* Record this register in the regUsed set */
+ regUsed |= regBits;
+
+ /* The register is now ineligible for all interfering variables */
+
+ VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
+
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ regNumber secondHalf = REG_NEXT(regNum);
+ VARSET_ITER_INIT(this, iter, lvaVarIntf[varIndex], intfIndex);
+ while (iter.NextElem(this, &intfIndex))
+ {
+ VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
+ }
+ }
+#endif
+
+ /* If a register argument, remove its incoming register
+ * from the "avoid" list */
+
+ if (varDsc->lvIsRegArg)
+ {
+ raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
+ }
+#endif
+ }
+
+ /* A variable of TYP_LONG can take two registers */
+ if (firstHalf)
+ continue;
+
+ // Since we have successfully enregistered this variable it is
+ // now time to move on and consider the next variable
+ goto ENREG_VAR;
+ }
+
+ if (firstHalf)
+ {
+ noway_assert(isRegPairType(varDsc->lvType));
+
+ /* This TYP_LONG is partially enregistered */
+
+ noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
+
+ if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
+ {
+ rpLostEnreg = true;
+ }
+
+ raAddToStkPredict(varDsc->lvRefCntWtd);
+ goto ENREG_VAR;
+ }
+
+ NO_REG:;
+ if (varDsc->lvDependReg)
+ {
+ rpLostEnreg = true;
+ }
+
+ if (!notWorthy)
+ {
+ /* Weighted count of variables that could have been enregistered but weren't */
+ raAddToStkPredict(varDsc->lvRefCntWtd);
+
+ if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
+ raAddToStkPredict(varDsc->lvRefCntWtd);
+ }
+
+ CANT_REG:;
+ varDsc->lvRegister = false;
+
+ varDsc->lvRegNum = REG_STK;
+ if (isRegPairType(varDsc->lvType))
+ varDsc->lvOtherReg = REG_STK;
+
+ /* unweighted count of variables that were not enregistered */
+
+ refCntStk += varDsc->lvRefCnt;
+
+#if DOUBLE_ALIGN
+ if (varDsc->lvIsParam)
+ {
+ refCntStkParam += varDsc->lvRefCnt;
+ }
+ else
+ {
+ /* Is it a stack based double? */
+ /* Note that double params are excluded since they can not be double aligned */
+ if (varDsc->lvType == TYP_DOUBLE)
+ {
+ refCntWtdStkDbl += varDsc->lvRefCntWtd;
+ }
+ }
+#endif
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("; ");
+ gtDispLclVar((unsigned)(varDsc - lvaTable));
+ if (varDsc->lvTracked)
+ printf("T%02u", varDsc->lvVarIndex);
+ else
+ printf(" ");
+ printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
+ if (varDsc->lvDoNotEnregister)
+ printf(", do-not-enregister");
+ printf("\n");
+ }
+#endif
+ continue;
+
+ ENREG_VAR:;
+
+ varDsc->lvRegister = true;
+
+ // Record the fact that we enregistered a stack arg when tail call is used.
+ if (compJmpOpUsed && !varDsc->lvIsRegArg)
+ {
+ rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
+ if (isRegPairType(varDsc->lvType))
+ {
+ rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("; ");
+ gtDispLclVar((unsigned)(varDsc - lvaTable));
+ printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
+ refCntWtd2str(varDsc->lvRefCntWtd));
+ varDsc->PrintVarReg();
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ printf(":%s", getRegName(varDsc->lvOtherReg));
+ }
+#endif
+ printf("\n");
+ }
+#endif
+ }
+
+#if ETW_EBP_FRAMED
+ noway_assert(refCntEBP == 0);
+#endif
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (refCntStk > 0)
+ printf("; refCntStk = %u\n", refCntStk);
+ if (refCntEBP > 0)
+ printf("; refCntEBP = %u\n", refCntEBP);
+ if (refCntWtdEBP > 0)
+ printf("; refCntWtdEBP = %u\n", refCntWtdEBP);
+#if DOUBLE_ALIGN
+ if (refCntStkParam > 0)
+ printf("; refCntStkParam = %u\n", refCntStkParam);
+ if (refCntWtdStkDbl > 0)
+ printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
+#endif
+ }
+#endif
+
+ /* Determine how the EBP register should be used */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if DOUBLE_ALIGN
+
+ if (!codeGen->isFramePointerRequired())
+ {
+ noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
+
+ /*
+ First let us decide if we should use EBP to create a
+ double-aligned frame, instead of enregistering variables
+ */
+
+ if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
+ {
+ rpFrameType = FT_DOUBLE_ALIGN_FRAME;
+ goto REVERSE_EBP_ENREG;
+ }
+
+ if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
+ {
+ /* OK, there may be some benefit to double-aligning the frame */
+ /* But let us compare the benefits vs. the costs of this */
+
+ /*
+ One cost to consider is the benefit of smaller code
+ when using EBP as a frame pointer register
+
+ Each stack variable reference is an extra byte of code
+ if we use a double-aligned frame, parameters are
+ accessed via EBP for a double-aligned frame so they
+ don't use an extra byte of code.
+
+ We pay one byte of code for each refCntStk and we pay
+ one byte or more for each refCntEBP but we save one
+ byte for each refCntStkParam.
+
+ Our savings are the elimination of a possible misaligned
+ access and a possible DCU spilt when an access crossed
+ a cache-line boundry.
+
+ We use the loop weighted value of
+ refCntWtdStkDbl * misaligned_weight (0, 4, 16)
+ to represent this savings.
+ */
+
+ // We also pay 7 extra bytes for the MOV EBP,ESP,
+ // LEA ESP,[EBP-0x10] and the AND ESP,-8 to double align ESP
+ const unsigned DBL_ALIGN_SETUP_SIZE = 7;
+
+ unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
+ unsigned misaligned_weight = 4;
+
+ if (compCodeOpt() == SMALL_CODE)
+ misaligned_weight = 0;
+
+ if (compCodeOpt() == FAST_CODE)
+ misaligned_weight *= 4;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("; Double alignment:\n");
+ printf("; Bytes that could be save by not using EBP frame: %i\n", bytesUsed);
+ printf("; Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
+ printf("; Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
+ }
+#endif
+
+ if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
+ {
+ /* It's probably better to use EBP as a frame pointer */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ printf("; Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
+#endif
+ goto NO_DOUBLE_ALIGN;
+ }
+
+ /*
+ Another cost to consider is the benefit of using EBP to enregister
+ one or more integer variables
+
+ We pay one extra memory reference for each refCntWtdEBP
+
+ Our savings are the elimination of a possible misaligned
+ access and a possible DCU spilt when an access crossed
+ a cache-line boundry.
+
+ */
+
+ // <BUGNUM>
+ // VSW 346717: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
+ // not double aligned.
+ // Here are the numbers that make this not double-aligned.
+ // refCntWtdStkDbl = 0x164
+ // refCntWtdEBP = 0x1a4
+ // We think we do need to change the heuristic to be in favor of double-align.
+ // </BUGNUM>
+
+ if (refCntWtdEBP > refCntWtdStkDbl * 2)
+ {
+ /* It's probably better to use EBP to enregister integer variables */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ printf("; Predicting not to double-align ESP to allow EBP to be used to enregister variables\n");
+#endif
+ goto NO_DOUBLE_ALIGN;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ printf("; Predicting to create a double-aligned frame\n");
+#endif
+ /*
+ OK we passed all of the benefit tests
+ so we'll predict a double aligned frame
+ */
+
+ rpFrameType = FT_DOUBLE_ALIGN_FRAME;
+ goto REVERSE_EBP_ENREG;
+ }
+ }
+
+NO_DOUBLE_ALIGN:
+#endif // DOUBLE_ALIGN
+
+ if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
+ {
+#ifdef _TARGET_XARCH_
+// clang-format off
+ /* If we are using EBP to enregister variables then
+ will we actually save bytes by setting up an EBP frame?
+
+ Each stack reference is an extra byte of code if we use
+ an ESP frame.
+
+ Here we measure the savings that we get by using EBP to
+ enregister variables vs. the cost in code size that we
+ pay when using an ESP based frame.
+
+ We pay one byte of code for each refCntStk
+ but we save one byte (or more) for each refCntEBP.
+
+ Our savings are the elimination of a stack memory read/write.
+ We use the loop weighted value of
+ refCntWtdEBP * mem_access_weight (0, 3, 6)
+ to represent this savings.
+ */
+
+ // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
+ // to set up an EBP frame in the prolog and epilog
+ #define EBP_FRAME_SETUP_SIZE 5
+ // clang-format on
+
+ if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
+ {
+ unsigned bytesSaved = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
+ unsigned mem_access_weight = 3;
+
+ if (compCodeOpt() == SMALL_CODE)
+ mem_access_weight = 0;
+ else if (compCodeOpt() == FAST_CODE)
+ mem_access_weight *= 2;
+
+ if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
+ {
+ /* It's not be a good idea to use EBP in our predictions */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (verbose && (refCntEBP > 0))
+ printf("; Predicting that it's not worth using EBP to enregister variables\n");
+#endif
+ rpFrameType = FT_EBP_FRAME;
+ goto REVERSE_EBP_ENREG;
+ }
+ }
+#endif // _TARGET_XARCH_
+
+ if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
+ {
+#ifdef DEBUG
+ const char* reason;
+#endif
+ if (rpMustCreateEBPCalled == false)
+ {
+ rpMustCreateEBPCalled = true;
+ if (rpMustCreateEBPFrame(INDEBUG(&reason)))
+ {
+#ifdef DEBUG
+ if (verbose)
+ printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
+#endif
+ codeGen->setFrameRequired(true);
+
+ rpFrameType = FT_EBP_FRAME;
+ goto REVERSE_EBP_ENREG;
+ }
+ }
+ }
+ }
+
+ goto EXIT;
+
+REVERSE_EBP_ENREG:
+
+ noway_assert(rpFrameType != FT_ESP_FRAME);
+
+ rpReverseEBPenreg = true;
+
+#if !ETW_EBP_FRAMED
+ if (refCntEBP > 0)
+ {
+ noway_assert(regUsed & RBM_FPBASE);
+
+ regUsed &= ~RBM_FPBASE;
+
+ /* variables that were enregistered in EBP become stack based variables */
+ raAddToStkPredict(refCntWtdEBP);
+
+ unsigned lclNum;
+
+ /* We're going to have to undo some predicted enregistered variables */
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Is this a register variable? */
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ if (isRegPairType(varDsc->lvType))
+ {
+ /* Only one can be EBP */
+ if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
+ {
+ if (varDsc->lvRegNum == REG_FPBASE)
+ varDsc->lvRegNum = varDsc->lvOtherReg;
+
+ varDsc->lvOtherReg = REG_STK;
+
+ if (varDsc->lvRegNum == REG_STK)
+ varDsc->lvRegister = false;
+
+ if (varDsc->lvDependReg)
+ rpLostEnreg = true;
+#ifdef DEBUG
+ if (verbose)
+ goto DUMP_MSG;
+#endif
+ }
+ }
+ else
+ {
+ if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
+ {
+ varDsc->lvRegNum = REG_STK;
+
+ varDsc->lvRegister = false;
+
+ if (varDsc->lvDependReg)
+ rpLostEnreg = true;
+#ifdef DEBUG
+ if (verbose)
+ {
+ DUMP_MSG:
+ printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
+ varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
+ (varDsc->lvRefCntWtd & 1) ? ".5" : "");
+ }
+#endif
+ }
+ }
+ }
+ }
+ }
+#endif // ETW_EBP_FRAMED
+
+EXIT:;
+
+ unsigned lclNum;
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Clear the lvDependReg flag for next iteration of the predictor */
+ varDsc->lvDependReg = false;
+
+ // If we set rpLostEnreg and this is the first pessimize pass
+ // then reverse the enreg of all TYP_LONG
+ if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
+ {
+ varDsc->lvRegNum = REG_STK;
+ varDsc->lvOtherReg = REG_STK;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose && raNewBlocks)
+ {
+ printf("\nAdded FP register killing blocks:\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+#endif
+ noway_assert(rpFrameType != FT_NOT_SET);
+
+ /* return the set of registers used to enregister variables */
+ return regUsed;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Predict register use for every tree in the function. Note that we do this
+ * at different times (not to mention in a totally different way) for x86 vs
+ * RISC targets.
+ */
+void Compiler::rpPredictRegUse()
+{
+#ifdef DEBUG
+ if (verbose)
+ raDumpVarIntf();
+#endif
+
+ // We might want to adjust the ref counts based on interference
+ raAdjustVarIntf();
+
+ regMaskTP allAcceptableRegs = RBM_ALLINT;
+
+#if FEATURE_FP_REGALLOC
+ allAcceptableRegs |= raConfigRestrictMaskFP();
+#endif
+
+ allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
+
+ /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
+ to acdHelper(). This is done implicitly, without creating a GT_CALL
+ node. Hence, this interference is be handled implicitly by
+ restricting the registers used for enregistering variables */
+
+ if (opts.compDbgCode)
+ {
+ allAcceptableRegs &= RBM_CALLEE_SAVED;
+ }
+
+ /* Compute the initial regmask to use for the first pass */
+ regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
+ regMaskTP regUsed;
+
+#if CPU_USES_BLOCK_MOVE
+ /* If we might need to generate a rep mov instruction */
+ /* remove ESI and EDI */
+ if (compBlkOpUsed)
+ regAvail &= ~(RBM_ESI | RBM_EDI);
+#endif
+
+#ifdef _TARGET_X86_
+ /* If we using longs then we remove ESI to allow */
+ /* ESI:EBX to be saved accross a call */
+ if (compLongUsed)
+ regAvail &= ~(RBM_ESI);
+#endif
+
+#ifdef _TARGET_ARM_
+ // For the first register allocation pass we don't want to color using r4
+ // as we want to allow it to be used to color the internal temps instead
+ // when r0,r1,r2,r3 are all in use.
+ //
+ regAvail &= ~(RBM_R4);
+#endif
+
+#if ETW_EBP_FRAMED
+ // We never have EBP available when ETW_EBP_FRAME is defined
+ regAvail &= ~RBM_FPBASE;
+#else
+ /* If a frame pointer is required then we remove EBP */
+ if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+ regAvail &= ~RBM_FPBASE;
+#endif
+
+#ifdef DEBUG
+ BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
+ if (fJitNoRegLoc)
+ regAvail = RBM_NONE;
+#endif
+
+ if ((opts.compFlags & CLFLG_REGVAR) == 0)
+ regAvail = RBM_NONE;
+
+#if FEATURE_STACK_FP_X87
+ VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
+
+ // Calculate the set of all tracked FP/non-FP variables
+ // into optAllFloatVars and optAllNonFPvars
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ continue;
+
+ /* Get hold of the index and the interference mask for the variable */
+
+ unsigned varNum = varDsc->lvVarIndex;
+
+ /* add to the set of all tracked FP/non-FP variables */
+
+ if (varDsc->IsFloatRegType())
+ VarSetOps::AddElemD(this, optAllFloatVars, varNum);
+ else
+ VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
+ }
+#endif
+
+ for (unsigned i = 0; i < REG_COUNT; i++)
+ {
+ VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
+ }
+ for (unsigned i = 0; i < lvaTrackedCount; i++)
+ {
+ VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
+ }
+
+ raNewBlocks = false;
+ rpPredictAssignAgain = false;
+ rpPasses = 0;
+
+ bool mustPredict = true;
+ unsigned stmtNum = 0;
+ unsigned oldStkPredict = DUMMY_INIT(~0);
+ VARSET_TP oldLclRegIntf[REG_COUNT];
+
+ for (unsigned i = 0; i < REG_COUNT; i++)
+ {
+ VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
+ }
+
+ while (true)
+ {
+ /* Assign registers to variables using the variable/register interference
+ graph (raLclRegIntf[]) calculated in the previous pass */
+ regUsed = rpPredictAssignRegVars(regAvail);
+
+ mustPredict |= rpLostEnreg;
+
+#ifdef _TARGET_ARM_
+
+ // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
+ //
+ if ((rpPasses == 0) && (codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD))
+ {
+ if (compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
+ {
+ // We must keep reserving R10 in this case
+ codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
+ }
+ else
+ {
+ // We can release our reservation on R10 and use it to color registers
+ //
+ codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
+ allAcceptableRegs |= RBM_OPT_RSVD;
+ }
+ }
+#endif
+
+ /* Is our new prediction good enough?? */
+ if (!mustPredict)
+ {
+ /* For small methods (less than 12 stmts), we add a */
+ /* extra pass if we are predicting the use of some */
+ /* of the caller saved registers. */
+ /* This fixes RAID perf bug 43440 VB Ackerman function */
+
+ if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
+ {
+ goto EXTRA_PASS;
+ }
+
+ /* If every variable was fully enregistered then we're done */
+ if (rpStkPredict == 0)
+ goto ALL_DONE;
+
+ // This was a successful prediction. Record it, in case it turns out to be the best one.
+ rpRecordPrediction();
+
+ if (rpPasses > 1)
+ {
+ noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
+
+ // Be careful about overflow
+ unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
+ if (oldStkPredict < highStkPredict)
+ goto ALL_DONE;
+
+ if (rpStkPredict < rpPasses * 8)
+ goto ALL_DONE;
+
+ if (rpPasses >= (rpPassesMax - 1))
+ goto ALL_DONE;
+ }
+
+ EXTRA_PASS:
+ /* We will do another pass */;
+ }
+
+#ifdef DEBUG
+ if (JitConfig.JitAssertOnMaxRAPasses())
+ {
+ noway_assert(rpPasses < rpPassesMax &&
+ "This may not a bug, but dev team should look and see what is happening");
+ }
+#endif
+
+ // The "64" here had been "VARSET_SZ". It is unclear why this number is connected with
+ // the (max) size of a VARSET. We've eliminated this constant, so I left this as a constant. We hope
+ // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
+ if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
+ {
+ NO_WAY("we seem to be stuck in an infinite loop. breaking out");
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (rpPasses > 0)
+ {
+ if (rpLostEnreg)
+ printf("\n; Another pass due to rpLostEnreg");
+ if (rpAddedVarIntf)
+ printf("\n; Another pass due to rpAddedVarIntf");
+ if ((rpPasses == 1) && rpPredictAssignAgain)
+ printf("\n; Another pass due to rpPredictAssignAgain");
+ }
+ printf("\n; Register predicting pass# %d\n", rpPasses + 1);
+ }
+#endif
+
+ /* Zero the variable/register interference graph */
+ for (unsigned i = 0; i < REG_COUNT; i++)
+ {
+ VarSetOps::ClearD(this, raLclRegIntf[i]);
+ }
+
+ // if there are PInvoke calls and compLvFrameListRoot is enregistered,
+ // it must not be in a register trashed by the callee
+ if (info.compCallUnmanaged != 0)
+ {
+ assert(!opts.ShouldUsePInvokeHelpers());
+ noway_assert(info.compLvFrameListRoot < lvaCount);
+
+ LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (pinvokeVarDsc->lvTracked)
+ {
+ rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
+ DEBUGARG("compLvFrameListRoot"));
+
+ // We would prefer to have this be enregister in the PINVOKE_TCB register
+ pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
+ }
+
+ // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
+ // worst case). Make sure that the return value compiler temp that we create for the single
+ // return block knows about this interference.
+ if (genReturnLocal != BAD_VAR_NUM)
+ {
+ noway_assert(genReturnBB);
+ LclVarDsc* localTmp = &lvaTable[genReturnLocal];
+ if (localTmp->lvTracked)
+ {
+ rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
+ VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
+ }
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ if (compFloatingPointUsed)
+ {
+ bool hasMustInitFloat = false;
+
+ // if we have any must-init floating point LclVars then we will add register interferences
+ // for the arguments with RBM_SCRATCH
+ // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
+ // we won't home the arguments into REG_SCRATCH
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
+ {
+ hasMustInitFloat = true;
+ break;
+ }
+ }
+
+ if (hasMustInitFloat)
+ {
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ // If is an incoming argument, that is tracked and not floating-point
+ if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
+ {
+ rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
+ DEBUGARG("arg home with must-init fp"));
+ }
+ }
+ }
+ }
+#endif
+
+ stmtNum = 0;
+ rpAddedVarIntf = false;
+ rpLostEnreg = false;
+
+ /* Walk the basic blocks and predict reg use for each tree */
+
+ for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ compCurBB = block;
+ compCurLifeTree = NULL;
+ VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
+
+ compCurBB = block;
+
+ for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ rpPredictSpillCnt = 0;
+ VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
+
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ stmtNum++;
+#ifdef DEBUG
+ if (verbose && 1)
+ {
+ printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+ rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
+
+ noway_assert(rpAsgVarNum == -1);
+
+ if (rpPredictSpillCnt > tmpIntSpillMax)
+ tmpIntSpillMax = rpPredictSpillCnt;
+ }
+ }
+ rpPasses++;
+
+ /* Decide whether we need to set mustPredict */
+ mustPredict = false;
+
+ if (rpAddedVarIntf)
+ {
+ mustPredict = true;
+#ifdef DEBUG
+ if (verbose)
+ raDumpVarIntf();
+#endif
+ }
+
+ if (rpPasses == 1)
+ {
+ if ((opts.compFlags & CLFLG_REGVAR) == 0)
+ goto ALL_DONE;
+
+ if (rpPredictAssignAgain)
+ mustPredict = true;
+#ifdef DEBUG
+ if (fJitNoRegLoc)
+ goto ALL_DONE;
+#endif
+ }
+
+ /* Calculate the new value to use for regAvail */
+
+ regAvail = allAcceptableRegs;
+
+ /* If a frame pointer is required then we remove EBP */
+ if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+ regAvail &= ~RBM_FPBASE;
+
+#if ETW_EBP_FRAMED
+ // We never have EBP available when ETW_EBP_FRAME is defined
+ regAvail &= ~RBM_FPBASE;
+#endif
+
+ // If we have done n-passes then we must continue to pessimize the
+ // interference graph by or-ing the interferences from the previous pass
+
+ if (rpPasses > rpPassesPessimize)
+ {
+ for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
+ VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
+
+ /* If we reverse an EBP enregistration then keep it that way */
+ if (rpReverseEBPenreg)
+ regAvail &= ~RBM_FPBASE;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ raDumpRegIntf();
+#endif
+
+ /* Save the old variable/register interference graph */
+ for (unsigned i = 0; i < REG_COUNT; i++)
+ {
+ VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
+ }
+ oldStkPredict = rpStkPredict;
+ } // end of while (true)
+
+ALL_DONE:;
+
+ // If we recorded a better feasible allocation than we ended up with, go back to using it.
+ rpUseRecordedPredictionIfBetter();
+
+#if DOUBLE_ALIGN
+ codeGen->setDoubleAlign(false);
+#endif
+
+ switch (rpFrameType)
+ {
+ default:
+ noway_assert(!"rpFrameType not set correctly!");
+ break;
+ case FT_ESP_FRAME:
+ noway_assert(!codeGen->isFramePointerRequired());
+ noway_assert(!codeGen->isFrameRequired());
+ codeGen->setFramePointerUsed(false);
+ break;
+ case FT_EBP_FRAME:
+ noway_assert((regUsed & RBM_FPBASE) == 0);
+ codeGen->setFramePointerUsed(true);
+ break;
+#if DOUBLE_ALIGN
+ case FT_DOUBLE_ALIGN_FRAME:
+ noway_assert((regUsed & RBM_FPBASE) == 0);
+ noway_assert(!codeGen->isFramePointerRequired());
+ codeGen->setFramePointerUsed(false);
+ codeGen->setDoubleAlign(true);
+ break;
+#endif
+ }
+
+ /* Record the set of registers that we need */
+ codeGen->regSet.rsClearRegsModified();
+ if (regUsed != RBM_NONE)
+ {
+ codeGen->regSet.rsSetRegsModified(regUsed);
+ }
+
+ /* We need genFullPtrRegMap if :
+ * The method is fully interruptible, or
+ * We are generating an EBP-less frame (for stack-pointer deltas)
+ */
+
+ genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
+
+ raMarkStkVars();
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
+ printf(" rpStkPredict was %u\n", rpStkPredict);
+ }
+#endif
+ rpRegAllocDone = true;
+}
+
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Mark all variables as to whether they live on the stack frame
+ * (part or whole), and if so what the base is (FP or SP).
+ */
+
+void Compiler::raMarkStkVars()
+{
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+ varDsc->lvOnFrame = false;
+#endif // LEGACY_BACKEND
+
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ noway_assert(!varDsc->lvRegister);
+ goto ON_STK;
+ }
+
+ /* Fully enregistered variables don't need any frame space */
+
+ if (varDsc->lvRegister)
+ {
+ if (!isRegPairType(varDsc->TypeGet()))
+ {
+ goto NOT_STK;
+ }
+
+ /* For "large" variables make sure both halves are enregistered */
+
+ if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
+ {
+ goto NOT_STK;
+ }
+ }
+ /* Unused variables typically don't get any frame space */
+ else if (varDsc->lvRefCnt == 0)
+ {
+ bool needSlot = false;
+
+ bool stkFixedArgInVarArgs =
+ info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
+
+ // If its address has been exposed, ignore lvRefCnt. However, exclude
+ // fixed arguments in varargs method as lvOnFrame shouldn't be set
+ // for them as we don't want to explicitly report them to GC.
+
+ if (!stkFixedArgInVarArgs)
+ {
+ needSlot |= varDsc->lvAddrExposed;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ /* Is this the dummy variable representing GT_LCLBLK ? */
+ needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+#ifdef DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ /* For debugging, note that we have to reserve space even for
+ unused variables if they are ever in scope. However, this is not
+ an issue as fgExtendDbgLifetimes() adds an initialization and
+ variables in scope will not have a zero ref-cnt.
+ */
+ if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
+ {
+ for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
+ {
+ noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
+ }
+ }
+#endif
+ /*
+ For Debug Code, we have to reserve space even if the variable is never
+ in scope. We will also need to initialize it if it is a GC var.
+ So we set lvMustInit and artifically bump up the ref-cnt.
+ */
+
+ if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
+ {
+ needSlot |= true;
+
+ if (lvaTypeIsGC(lclNum))
+ {
+ varDsc->lvRefCnt = 1;
+ }
+
+ if (!varDsc->lvIsParam)
+ {
+ varDsc->lvMustInit = true;
+ }
+ }
+#endif // DEBUGGING_SUPPORT
+
+#ifndef LEGACY_BACKEND
+ varDsc->lvOnFrame = needSlot;
+#endif // !LEGACY_BACKEND
+ if (!needSlot)
+ {
+ /* Clear the lvMustInit flag in case it is set */
+ varDsc->lvMustInit = false;
+
+ goto NOT_STK;
+ }
+ }
+
+#ifndef LEGACY_BACKEND
+ if (!varDsc->lvOnFrame)
+ {
+ goto NOT_STK;
+ }
+#endif // !LEGACY_BACKEND
+
+ ON_STK:
+ /* The variable (or part of it) lives on the stack frame */
+
+ noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
+#if FEATURE_FIXED_OUT_ARGS
+ noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
+#else // FEATURE_FIXED_OUT_ARGS
+ noway_assert(lvaLclSize(lclNum) != 0);
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
+ // stack frame
+
+ NOT_STK:;
+ varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
+
+#if DOUBLE_ALIGN
+
+ if (codeGen->doDoubleAlign())
+ {
+ noway_assert(codeGen->isFramePointerUsed() == false);
+
+ /* All arguments are off of EBP with double-aligned frames */
+
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ {
+ varDsc->lvFramePointerBased = true;
+ }
+ }
+
+#endif
+
+ /* Some basic checks */
+
+ // It must be in a register, on frame, or have zero references.
+
+ noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
+
+#ifndef LEGACY_BACKEND
+ // We can't have both lvRegister and lvOnFrame for RyuJIT
+ noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
+#else // LEGACY_BACKEND
+
+ /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
+ noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
+ (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+
+ // For varargs functions, there should be no direct references to
+ // parameter variables except for 'this' (because these were morphed
+ // in the importer) and the 'arglist' parameter (which is not a GC
+ // pointer). and the return buffer argument (if we are returning a
+ // struct).
+ // This is important because we don't want to try to report them
+ // to the GC, as the frame offsets in these local varables would
+ // not be correct.
+
+ if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
+ {
+ if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
+ {
+ noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
+ }
+ }
+#endif
+ }
+}
+
+#ifdef LEGACY_BACKEND
+void Compiler::rpRecordPrediction()
+{
+ if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
+ {
+ if (rpBestRecordedPrediction == NULL)
+ {
+ rpBestRecordedPrediction =
+ reinterpret_cast<VarRegPrediction*>(compGetMemArrayA(lvaCount, sizeof(VarRegPrediction)));
+ }
+ for (unsigned k = 0; k < lvaCount; k++)
+ {
+ rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
+ rpBestRecordedPrediction[k].m_regNum = (regNumberSmall)lvaTable[k].GetRegNum();
+ rpBestRecordedPrediction[k].m_otherReg = (regNumberSmall)lvaTable[k].GetOtherReg();
+ }
+ rpBestRecordedStkPredict = rpStkPredict;
+ JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
+ }
+}
+
+void Compiler::rpUseRecordedPredictionIfBetter()
+{
+ JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
+ rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
+ if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
+ {
+ JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
+ rpBestRecordedStkPredict);
+
+ for (unsigned k = 0; k < lvaCount; k++)
+ {
+ lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
+ lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
+ lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
+ }
+ }
+}
+#endif // LEGACY_BACKEND