summaryrefslogtreecommitdiff
path: root/src/jit/codegencommon.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/codegencommon.cpp')
-rwxr-xr-xsrc/jit/codegencommon.cpp11779
1 files changed, 11779 insertions, 0 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
new file mode 100755
index 0000000000..2710447ade
--- /dev/null
+++ b/src/jit/codegencommon.cpp
@@ -0,0 +1,11779 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Code Generator Common: XX
+XX Methods common to all architectures and register allocation strategies XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
+// identical, and which should probably be moved here.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "codegen.h"
+
+#include "gcinfo.h"
+#include "emit.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+/*****************************************************************************/
+
+const BYTE genTypeSizes[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genTypeAlignments[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genTypeStSzs[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genActualTypes[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+void CodeGenInterface::setFramePointerRequiredEH(bool value)
+{
+ m_cgFramePointerRequired = value;
+
+#ifndef JIT32_GCENCODER
+ if (value)
+ {
+ // EnumGcRefs will only enumerate slots in aborted frames
+ // if they are fully-interruptible. So if we have a catch
+ // or finally that will keep frame-vars alive, we need to
+ // force fully-interruptible.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Method has EH, marking method as fully interruptible\n");
+ }
+#endif
+
+ m_cgInterruptible = true;
+ }
+#endif // JIT32_GCENCODER
+}
+
+/*****************************************************************************/
+CodeGenInterface* getCodeGenerator(Compiler* comp)
+{
+ return new (comp, CMK_Codegen) CodeGen(comp);
+}
+
+// CodeGen constructor
+CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
+ : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler)
+{
+}
+
+/*****************************************************************************/
+
+CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
+{
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ negBitmaskFlt = nullptr;
+ negBitmaskDbl = nullptr;
+ absBitmaskFlt = nullptr;
+ absBitmaskDbl = nullptr;
+ u8ToDblBitmask = nullptr;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+ regTracker.rsTrackInit(compiler, &regSet);
+ gcInfo.regSet = &regSet;
+ m_cgEmitter = new (compiler->getAllocator()) emitter();
+ m_cgEmitter->codeGen = this;
+ m_cgEmitter->gcInfo = &gcInfo;
+
+#ifdef DEBUG
+ setVerbose(compiler->verbose);
+#endif // DEBUG
+
+ compiler->tmpInit();
+
+#ifdef DEBUG
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+ // This appears to be x86-specific. It's attempting to make sure all offsets to temps
+ // are large. For ARM, this doesn't interact well with our decision about whether to use
+ // R10 or not as a reserved register.
+ if (regSet.rsStressRegs())
+ compiler->tmpIntSpillMax = (SCHAR_MAX / sizeof(int));
+#endif // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+#endif // DEBUG
+
+ instInit();
+
+#ifdef LEGACY_BACKEND
+ // TODO-Cleanup: These used to be set in rsInit() - should they be moved to RegSet??
+ // They are also accessed by the register allocators and fgMorphLclVar().
+ intRegState.rsCurRegArgNum = 0;
+ floatRegState.rsCurRegArgNum = 0;
+#endif // LEGACY_BACKEND
+
+#ifdef LATE_DISASM
+ getDisAssembler().disInit(compiler);
+#endif
+
+#ifdef DEBUG
+ genTempLiveChg = true;
+ genTrnslLocalVarCount = 0;
+
+ // Shouldn't be used before it is set in genFnProlog()
+ compiler->compCalleeRegsPushed = UninitializedWord<unsigned>();
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // Shouldn't be used before it is set in genFnProlog()
+ compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+#endif // DEBUG
+
+#ifdef _TARGET_AMD64_
+ // This will be set before final frame layout.
+ compiler->compVSQuirkStackPaddingNeeded = 0;
+
+ // Set to true if we perform the Quirk that fixes the PPP issue
+ compiler->compQuirkForPPPflag = false;
+#endif // _TARGET_AMD64_
+
+#ifdef LEGACY_BACKEND
+ genFlagsEqualToNone();
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUGGING_SUPPORT
+ // Initialize the IP-mapping logic.
+ compiler->genIPmappingList = nullptr;
+ compiler->genIPmappingLast = nullptr;
+ compiler->genCallSite2ILOffsetMap = nullptr;
+#endif
+
+ /* Assume that we not fully interruptible */
+
+ genInterruptible = false;
+#ifdef DEBUG
+ genInterruptibleUsed = false;
+ genCurDispOffset = (unsigned)-1;
+#endif
+}
+
+void CodeGenInterface::genMarkTreeInReg(GenTreePtr tree, regNumber reg)
+{
+ tree->gtRegNum = reg;
+ tree->gtFlags |= GTF_REG_VAL;
+}
+
+#if CPU_LONG_USES_REGPAIR
+void CodeGenInterface::genMarkTreeInRegPair(GenTreePtr tree, regPairNo regPair)
+{
+ tree->gtRegPair = regPair;
+ tree->gtFlags |= GTF_REG_VAL;
+}
+#endif
+
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the "total" size of the stack frame, including local size
+// and callee-saved register size. There are a few things "missing" depending on the
+// platform. The function genCallerSPtoInitialSPdelta() includes those things.
+//
+// For ARM, this doesn't include the prespilled registers.
+//
+// For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
+// It also doesn't include the pushed return address.
+//
+// Return value:
+// Frame size
+
+int CodeGenInterface::genTotalFrameSize()
+{
+ assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+ int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+ assert(totalFrameSize >= 0);
+ return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genSPtoFPdelta - return the offset from SP to the frame pointer.
+// This number is going to be positive, since SP must be at the lowest
+// address.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genSPtoFPdelta()
+{
+ assert(isFramePointerUsed());
+
+ int delta;
+
+ delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
+
+ assert(delta >= 0);
+ return delta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+ assert(isFramePointerUsed());
+ int callerSPtoFPdelta = 0;
+
+#if defined(_TARGET_ARM_)
+ // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
+ callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+ callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
+#elif defined(_TARGET_X86_)
+ // Thanks to ebp chaining, the difference between ebp-based addresses
+ // and caller-SP-relative addresses is just the 2 pointers:
+ // return address
+ // pushed ebp
+ callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
+#else
+#error "Unknown _TARGET_"
+#endif // _TARGET_*
+
+ assert(callerSPtoFPdelta <= 0);
+ return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+ int callerSPtoSPdelta = 0;
+
+#if defined(_TARGET_ARM_)
+ callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+ callerSPtoSPdelta -= genTotalFrameSize();
+#elif defined(_TARGET_X86_)
+ callerSPtoSPdelta -= genTotalFrameSize();
+ callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
+
+ // compCalleeRegsPushed does not account for the frame pointer
+ // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
+ if (isFramePointerUsed())
+ {
+ callerSPtoSPdelta -= REGSIZE_BYTES;
+ }
+#else
+#error "Unknown _TARGET_"
+#endif // _TARGET_*
+
+ assert(callerSPtoSPdelta <= 0);
+ return callerSPtoSPdelta;
+}
+
+#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * Should we round simple operations (assignments, arithmetic operations, etc.)
+ */
+
+// inline
+// static
+bool CodeGen::genShouldRoundFP()
+{
+ RoundLevel roundLevel = getRoundFloatLevel();
+
+ switch (roundLevel)
+ {
+ case ROUND_NEVER:
+ case ROUND_CMP_CONST:
+ case ROUND_CMP:
+ return false;
+
+ default:
+ assert(roundLevel == ROUND_ALWAYS);
+ return true;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Initialize some global variables.
+ */
+
+void CodeGen::genPrepForCompiler()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ /* Figure out which non-register variables hold pointers */
+
+ VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
+
+ // Figure out which variables live in registers.
+ // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
+ // in a register (i.e. they live on the stack for all or part of their lifetime).
+ // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
+
+ VarSetOps::AssignNoCopy(compiler, compiler->raRegVarsMask, VarSetOps::MakeEmpty(compiler));
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->lvTracked
+#ifndef LEGACY_BACKEND
+ || varDsc->lvIsRegCandidate()
+#endif // !LEGACY_BACKEND
+ )
+ {
+ if (varDsc->lvRegister
+#if FEATURE_STACK_FP_X87
+ && !varDsc->IsFloatRegType()
+#endif
+ )
+ {
+ VarSetOps::AddElemD(compiler, compiler->raRegVarsMask, varDsc->lvVarIndex);
+ }
+ else if (compiler->lvaIsGCTracked(varDsc) && (!varDsc->lvIsParam || varDsc->lvIsRegArg))
+ {
+ VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
+ }
+ }
+ }
+ VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
+ genLastLiveMask = RBM_NONE;
+#ifdef DEBUG
+ compiler->fgBBcountAtCodegen = compiler->fgBBcount;
+#endif
+}
+
+/*****************************************************************************
+ * To report exception handling information to the VM, we need the size of the exception
+ * handling regions. To compute that, we need to emit labels for the beginning block of
+ * an EH region, and the block that immediately follows a region. Go through the EH
+ * table and mark all these blocks with BBF_HAS_LABEL to make this happen.
+ *
+ * The beginning blocks of the EH regions already should have this flag set.
+ *
+ * No blocks should be added or removed after this.
+ *
+ * This code is closely couple with genReportEH() in the sense that any block
+ * that this procedure has determined it needs to have a label has to be selected
+ * using the same logic both here and in genReportEH(), so basically any time there is
+ * a change in the way we handle EH reporting, we have to keep the logic of these two
+ * methods 'in sync'.
+ */
+
+void CodeGen::genPrepForEHCodegen()
+{
+ assert(!compiler->fgSafeBasicBlockCreation);
+
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ bool anyFinallys = false;
+
+ for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+ HBtab < HBtabEnd; HBtab++)
+ {
+ assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
+ assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
+
+ if (HBtab->ebdTryLast->bbNext != nullptr)
+ {
+ HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
+ }
+
+ if (HBtab->ebdHndLast->bbNext != nullptr)
+ {
+ HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
+ }
+
+ if (HBtab->HasFilter())
+ {
+ assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
+ // The block after the last block of the filter is
+ // the handler begin block, which we already asserted
+ // has BBF_HAS_LABEL set.
+ }
+
+#ifdef _TARGET_AMD64_
+ if (HBtab->HasFinallyHandler())
+ {
+ anyFinallys = true;
+ }
+#endif // _TARGET_AMD64_
+ }
+
+#ifdef _TARGET_AMD64_
+ if (anyFinallys)
+ {
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ BasicBlock* bbToLabel = block->bbNext;
+ if (block->isBBCallAlwaysPair())
+ {
+ bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
+ }
+ if (bbToLabel != nullptr)
+ {
+ bbToLabel->bbFlags |= BBF_HAS_LABEL;
+ }
+ } // block is BBJ_CALLFINALLY
+ } // for each block
+ } // if (anyFinallys)
+#endif // _TARGET_AMD64_
+}
+
+void CodeGenInterface::genUpdateLife(GenTreePtr tree)
+{
+ compiler->compUpdateLife</*ForCodeGen*/ true>(tree);
+}
+
+void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
+{
+ compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
+}
+
+#ifdef LEGACY_BACKEND
+// Returns the liveSet after tree has executed.
+// "tree" MUST occur in the current statement, AFTER the most recent
+// update of compiler->compCurLifeTree and compiler->compCurLife.
+//
+VARSET_VALRET_TP CodeGen::genUpdateLiveSetForward(GenTreePtr tree)
+{
+ VARSET_TP VARSET_INIT(compiler, startLiveSet, compiler->compCurLife);
+ GenTreePtr startNode;
+ assert(tree != compiler->compCurLifeTree);
+ if (compiler->compCurLifeTree == nullptr)
+ {
+ assert(compiler->compCurStmt != nullptr);
+ startNode = compiler->compCurStmt->gtStmt.gtStmtList;
+ }
+ else
+ {
+ startNode = compiler->compCurLifeTree->gtNext;
+ }
+ return compiler->fgUpdateLiveSet(startLiveSet, startNode, tree);
+}
+
+// Determine the registers that are live after "second" has been evaluated,
+// but which are not live after "first".
+// PRECONDITIONS:
+// 1. "first" must occur after compiler->compCurLifeTree in execution order for the current statement
+// 2. "second" must occur after "first" in the current statement
+//
+regMaskTP CodeGen::genNewLiveRegMask(GenTreePtr first, GenTreePtr second)
+{
+ // First, compute the liveset after "first"
+ VARSET_TP firstLiveSet = genUpdateLiveSetForward(first);
+ // Now, update the set forward from "first" to "second"
+ VARSET_TP secondLiveSet = compiler->fgUpdateLiveSet(firstLiveSet, first->gtNext, second);
+ regMaskTP newLiveMask = genLiveMask(VarSetOps::Diff(compiler, secondLiveSet, firstLiveSet));
+ return newLiveMask;
+}
+#endif
+
+// Return the register mask for the given register variable
+// inline
+regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
+{
+ regMaskTP regMask = RBM_NONE;
+
+ assert(varDsc->lvIsInReg());
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ regMask = genRegMask(varDsc->lvRegNum);
+ if (isRegPairType(varDsc->lvType))
+ {
+ regMask |= genRegMask(varDsc->lvOtherReg);
+ }
+ }
+ return regMask;
+}
+
+// Return the register mask for the given lclVar or regVar tree node
+// inline
+regMaskTP CodeGenInterface::genGetRegMask(GenTreePtr tree)
+{
+ assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_REG_VAR);
+
+ regMaskTP regMask = RBM_NONE;
+ const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+ if (varDsc->lvPromoted)
+ {
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ noway_assert(compiler->lvaTable[i].lvIsStructField);
+ if (compiler->lvaTable[i].lvIsInReg())
+ {
+ regMask |= genGetRegMask(&compiler->lvaTable[i]);
+ }
+ }
+ }
+ else if (varDsc->lvIsInReg())
+ {
+ regMask = genGetRegMask(varDsc);
+ }
+ return regMask;
+}
+
+//------------------------------------------------------------------------
+// getRegistersFromMask: Given a register mask return the two registers
+// specified by the mask.
+//
+// Arguments:
+// regPairMask: a register mask that has exactly two bits set
+// Return values:
+// pLoReg: the address of where to write the first register
+// pHiReg: the address of where to write the second register
+//
+void CodeGenInterface::genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg)
+{
+ assert(genCountBits(regPairMask) == 2);
+
+ regMaskTP loMask = genFindLowestBit(regPairMask); // set loMask to a one-bit mask
+ regMaskTP hiMask = regPairMask - loMask; // set hiMask to the other bit that was in tmpRegMask
+
+ regNumber loReg = genRegNumFromMask(loMask); // set loReg from loMask
+ regNumber hiReg = genRegNumFromMask(hiMask); // set hiReg from hiMask
+
+ *pLoReg = loReg;
+ *pHiReg = hiReg;
+}
+
+// The given lclVar is either going live (being born) or dying.
+// It might be both going live and dying (that is, it is a dead store) under MinOpts.
+// Update regSet.rsMaskVars accordingly.
+// inline
+void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTreePtr tree))
+{
+#if FEATURE_STACK_FP_X87
+ // The stack fp reg vars are handled elsewhere
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ return;
+#endif
+
+ regMaskTP regMask = genGetRegMask(varDsc);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
+ varDsc->PrintVarReg();
+ printf(" is becoming %s ", (isDying) ? "dead" : "live");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ if (isDying)
+ {
+ // We'd like to be able to assert the following, however if we are walking
+ // through a qmark/colon tree, we may encounter multiple last-use nodes.
+ // assert((regSet.rsMaskVars & regMask) == regMask);
+ regSet.RemoveMaskVars(regMask);
+ }
+ else
+ {
+ assert((regSet.rsMaskVars & regMask) == 0);
+ regSet.AddMaskVars(regMask);
+ }
+}
+
+// Gets a register mask that represent the kill set for a helper call since
+// not all JIT Helper calls follow the standard ABI on the target architecture.
+//
+// TODO-CQ: Currently this list is incomplete (not all helpers calls are
+// enumerated) and not 100% accurate (some killsets are bigger than
+// what they really are).
+// There's some work to be done in several places in the JIT to
+// accurately track the registers that are getting killed by
+// helper calls:
+// a) LSRA needs several changes to accomodate more precise killsets
+// for every helper call it sees (both explicitly [easy] and
+// implicitly [hard])
+// b) Currently for AMD64, when we generate code for a helper call
+// we're independently over-pessimizing the killsets of the call
+// (independently from LSRA) and this needs changes
+// both in CodeGenAmd64.cpp and emitx86.cpp.
+//
+// The best solution for this problem would be to try to centralize
+// the killset information in a single place but then make the
+// corresponding changes so every code generation phase is in sync
+// about this.
+//
+// The interim solution is to only add known helper calls that don't
+// follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
+regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
+{
+ switch (helper)
+ {
+ case CORINFO_HELP_ASSIGN_BYREF:
+#if defined(_TARGET_AMD64_)
+ return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH;
+#elif defined(_TARGET_ARM64_)
+ return RBM_CALLEE_TRASH_NOGC;
+#else
+ NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
+ return RBM_CALLEE_TRASH;
+#endif
+
+ case CORINFO_HELP_PROF_FCN_ENTER:
+#ifdef _TARGET_AMD64_
+ return RBM_PROFILER_ENTER_TRASH;
+#else
+ unreached();
+#endif
+ case CORINFO_HELP_PROF_FCN_LEAVE:
+ case CORINFO_HELP_PROF_FCN_TAILCALL:
+#ifdef _TARGET_AMD64_
+ return RBM_PROFILER_LEAVE_TRASH;
+#else
+ unreached();
+#endif
+
+ case CORINFO_HELP_STOP_FOR_GC:
+ return RBM_STOP_FOR_GC_TRASH;
+
+ case CORINFO_HELP_INIT_PINVOKE_FRAME:
+ return RBM_INIT_PINVOKE_FRAME_TRASH;
+
+ default:
+ return RBM_CALLEE_TRASH;
+ }
+}
+
+//
+// Gets a register mask that represents the kill set for "NO GC" helper calls since
+// not all JIT Helper calls follow the standard ABI on the target architecture.
+//
+// Note: This list may not be complete and defaults to the default NOGC registers.
+//
+regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
+{
+ assert(emitter::emitNoGChelper(helper));
+#ifdef _TARGET_AMD64_
+ switch (helper)
+ {
+ case CORINFO_HELP_PROF_FCN_ENTER:
+ return RBM_PROFILER_ENTER_TRASH;
+
+ case CORINFO_HELP_PROF_FCN_LEAVE:
+ case CORINFO_HELP_PROF_FCN_TAILCALL:
+ return RBM_PROFILER_LEAVE_TRASH;
+
+ case CORINFO_HELP_ASSIGN_BYREF:
+ // this helper doesn't trash RSI and RDI
+ return RBM_CALLEE_TRASH_NOGC & ~(RBM_RSI | RBM_RDI);
+
+ default:
+ return RBM_CALLEE_TRASH_NOGC;
+ }
+#else
+ return RBM_CALLEE_TRASH_NOGC;
+#endif
+}
+
+// Update liveness (always var liveness, i.e., compCurLife, and also, if "ForCodeGen" is true, reg liveness, i.e.,
+// regSet.rsMaskVars as well)
+// if the given lclVar (or indir(addr(local)))/regVar node is going live (being born) or dying.
+template <bool ForCodeGen>
+void Compiler::compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars)
+{
+ GenTreePtr indirAddrLocal = fgIsIndirOfAddrOfLocal(tree);
+ assert(tree->OperIsNonPhiLocal() || indirAddrLocal != nullptr);
+
+ // Get the local var tree -- if "tree" is "Ldobj(addr(x))", or "ind(addr(x))" this is "x", else it's "tree".
+ GenTreePtr lclVarTree = indirAddrLocal;
+ if (lclVarTree == nullptr)
+ {
+ lclVarTree = tree;
+ }
+ unsigned int lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+#ifdef DEBUG
+#if !defined(_TARGET_AMD64_)
+ // There are no addr nodes on ARM and we are experimenting with encountering vars in 'random' order.
+ // Struct fields are not traversed in a consistent order, so ignore them when
+ // verifying that we see the var nodes in execution order
+ if (ForCodeGen)
+ {
+ if (tree->OperIsIndir())
+ {
+ assert(indirAddrLocal != NULL);
+ }
+ else if (tree->gtNext != NULL && tree->gtNext->gtOper == GT_ADDR &&
+ ((tree->gtNext->gtNext == NULL || !tree->gtNext->gtNext->OperIsIndir())))
+ {
+ assert(tree->IsLocal()); // Can only take the address of a local.
+ // The ADDR might occur in a context where the address it contributes is eventually
+ // dereferenced, so we can't say that this is not a use or def.
+ }
+#if 0
+ // TODO-ARM64-Bug?: These asserts don't seem right for ARM64: I don't understand why we have to assert
+ // two consecutive lclvars (in execution order) can only be observed if the first one is a struct field.
+ // It seems to me this is code only applicable to the legacy JIT and not RyuJIT (and therefore why it was
+ // ifdef'ed out for AMD64).
+ else if (!varDsc->lvIsStructField)
+ {
+ GenTreePtr prevTree;
+ for (prevTree = tree->gtPrev;
+ prevTree != NULL && prevTree != compCurLifeTree;
+ prevTree = prevTree->gtPrev)
+ {
+ if ((prevTree->gtOper == GT_LCL_VAR) || (prevTree->gtOper == GT_REG_VAR))
+ {
+ LclVarDsc * prevVarDsc = lvaTable + prevTree->gtLclVarCommon.gtLclNum;
+
+ // These are the only things for which this method MUST be called
+ assert(prevVarDsc->lvIsStructField);
+ }
+ }
+ assert(prevTree == compCurLifeTree);
+ }
+#endif // 0
+ }
+#endif // !_TARGET_AMD64_
+#endif // DEBUG
+
+ compCurLifeTree = tree;
+ VARSET_TP VARSET_INIT(this, newLife, compCurLife);
+
+ // By codegen, a struct may not be TYP_STRUCT, so we have to
+ // check lvPromoted, for the case where the fields are being
+ // tracked.
+ if (!varDsc->lvTracked && !varDsc->lvPromoted)
+ {
+ return;
+ }
+
+ bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0); // if it's "x <op>=
+ // ..." then variable
+ // "x" must have had a
+ // previous, original,
+ // site to be born.
+ bool isDying = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
+#ifndef LEGACY_BACKEND
+ bool spill = ((tree->gtFlags & GTF_SPILL) != 0);
+#endif // !LEGACY_BACKEND
+
+#ifndef LEGACY_BACKEND
+ // For RyuJIT backend, since all tracked vars are register candidates, but not all are in registers at all times,
+ // we maintain two separate sets of variables - the total set of variables that are either
+ // born or dying here, and the subset of those that are on the stack
+ VARSET_TP VARSET_INIT_NOCOPY(stackVarDeltaSet, VarSetOps::MakeEmpty(this));
+#endif // !LEGACY_BACKEND
+
+ if (isBorn || isDying)
+ {
+ bool hasDeadTrackedFieldVars = false; // If this is true, then, for a LDOBJ(ADDR(<promoted struct local>)),
+ VARSET_TP* deadTrackedFieldVars =
+ nullptr; // *deadTrackedFieldVars indicates which tracked field vars are dying.
+ VARSET_TP VARSET_INIT_NOCOPY(varDeltaSet, VarSetOps::MakeEmpty(this));
+
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(this, varDeltaSet, varDsc->lvVarIndex);
+ if (ForCodeGen)
+ {
+#ifndef LEGACY_BACKEND
+ if (isBorn && varDsc->lvIsRegCandidate() && tree->gtHasReg())
+ {
+ codeGen->genUpdateVarReg(varDsc, tree);
+ }
+#endif // !LEGACY_BACKEND
+ if (varDsc->lvIsInReg()
+#ifndef LEGACY_BACKEND
+ && tree->gtRegNum != REG_NA
+#endif // !LEGACY_BACKEND
+ )
+ {
+ codeGen->genUpdateRegLife(varDsc, isBorn, isDying DEBUGARG(tree));
+ }
+#ifndef LEGACY_BACKEND
+ else
+ {
+ VarSetOps::AddElemD(this, stackVarDeltaSet, varDsc->lvVarIndex);
+ }
+#endif // !LEGACY_BACKEND
+ }
+ }
+ else if (varDsc->lvPromoted)
+ {
+ if (indirAddrLocal != nullptr && isDying)
+ {
+ assert(!isBorn); // GTF_VAR_DEATH only set for LDOBJ last use.
+ hasDeadTrackedFieldVars = GetPromotedStructDeathVars()->Lookup(indirAddrLocal, &deadTrackedFieldVars);
+ if (hasDeadTrackedFieldVars)
+ {
+ VarSetOps::Assign(this, varDeltaSet, *deadTrackedFieldVars);
+ }
+ }
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ LclVarDsc* fldVarDsc = &(lvaTable[i]);
+ noway_assert(fldVarDsc->lvIsStructField);
+ if (fldVarDsc->lvTracked)
+ {
+ unsigned fldVarIndex = fldVarDsc->lvVarIndex;
+ noway_assert(fldVarIndex < lvaTrackedCount);
+ if (!hasDeadTrackedFieldVars)
+ {
+ VarSetOps::AddElemD(this, varDeltaSet, fldVarIndex);
+ if (ForCodeGen)
+ {
+ // We repeat this call here and below to avoid the VarSetOps::IsMember
+ // test in this, the common case, where we have no deadTrackedFieldVars.
+ if (fldVarDsc->lvIsInReg())
+ {
+#ifndef LEGACY_BACKEND
+ if (isBorn)
+ {
+ codeGen->genUpdateVarReg(fldVarDsc, tree);
+ }
+#endif // !LEGACY_BACKEND
+ codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
+ }
+#ifndef LEGACY_BACKEND
+ else
+ {
+ VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
+ }
+#endif // !LEGACY_BACKEND
+ }
+ }
+ else if (ForCodeGen && VarSetOps::IsMember(this, varDeltaSet, fldVarIndex))
+ {
+ if (lvaTable[i].lvIsInReg())
+ {
+#ifndef LEGACY_BACKEND
+ if (isBorn)
+ {
+ codeGen->genUpdateVarReg(fldVarDsc, tree);
+ }
+#endif // !LEGACY_BACKEND
+ codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
+ }
+#ifndef LEGACY_BACKEND
+ else
+ {
+ VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
+ }
+#endif // !LEGACY_BACKEND
+ }
+ }
+ }
+ }
+
+ // First, update the live set
+ if (isDying)
+ {
+ // We'd like to be able to assert the following, however if we are walking
+ // through a qmark/colon tree, we may encounter multiple last-use nodes.
+ // assert (VarSetOps::IsSubset(compiler, regVarDeltaSet, newLife));
+ VarSetOps::DiffD(this, newLife, varDeltaSet);
+ if (pLastUseVars != nullptr)
+ {
+ VarSetOps::Assign(this, *pLastUseVars, varDeltaSet);
+ }
+ }
+ else
+ {
+ // This shouldn't be in newLife, unless this is debug code, in which
+ // case we keep vars live everywhere, OR the variable is address-exposed,
+ // OR this block is part of a try block, in which case it may be live at the handler
+ // Could add a check that, if it's in newLife, that it's also in
+ // fgGetHandlerLiveVars(compCurBB), but seems excessive
+ //
+ // For a dead store, it can be the case that we set both isBorn and isDying to true.
+ // (We don't eliminate dead stores under MinOpts, so we can't assume they're always
+ // eliminated.) If it's both, we handled it above.
+ VarSetOps::UnionD(this, newLife, varDeltaSet);
+ }
+ }
+
+ if (!VarSetOps::Equal(this, compCurLife, newLife))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\t\t\t\t\t\t\tLive vars: ");
+ dumpConvertedVarSet(this, compCurLife);
+ printf(" => ");
+ dumpConvertedVarSet(this, newLife);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ VarSetOps::Assign(this, compCurLife, newLife);
+
+ if (ForCodeGen)
+ {
+#ifndef LEGACY_BACKEND
+
+ // Only add vars to the gcInfo.gcVarPtrSetCur if they are currently on stack, since the
+ // gcInfo.gcTrkStkPtrLcls
+ // includes all TRACKED vars that EVER live on the stack (i.e. are not always in a register).
+ VARSET_TP VARSET_INIT_NOCOPY(gcTrkStkDeltaSet,
+ VarSetOps::Intersection(this, codeGen->gcInfo.gcTrkStkPtrLcls,
+ stackVarDeltaSet));
+ if (!VarSetOps::IsEmpty(this, gcTrkStkDeltaSet))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\t\t\t\t\t\t\tGCvars: ");
+ dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+ printf(" => ");
+ }
+#endif // DEBUG
+
+ if (isBorn)
+ {
+ VarSetOps::UnionD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
+ }
+ else
+ {
+ VarSetOps::DiffD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+
+#else // LEGACY_BACKEND
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(gcVarPtrSetNew,
+ VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+ if (!VarSetOps::Equal(this, codeGen->gcInfo.gcVarPtrSetCur, gcVarPtrSetNew))
+ {
+ printf("\t\t\t\t\t\t\tGCvars: ");
+ dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+ printf(" => ");
+ dumpConvertedVarSet(this, gcVarPtrSetNew);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
+ VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUGGING_SUPPORT
+ codeGen->siUpdate();
+#endif
+ }
+ }
+
+#ifndef LEGACY_BACKEND
+ if (ForCodeGen && spill)
+ {
+ assert(!varDsc->lvPromoted);
+ codeGen->genSpillVar(tree);
+ if (VarSetOps::IsMember(this, codeGen->gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+ {
+ if (!VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\t\t\t\t\t\t\tVar V%02u becoming live\n", varDsc - lvaTable);
+ }
+#endif // DEBUG
+ }
+ }
+ }
+#endif // !LEGACY_BACKEND
+}
+
+// Need an explicit instantiation.
+template void Compiler::compUpdateLifeVar<false>(GenTreePtr tree, VARSET_TP* pLastUseVars);
+
+template <bool ForCodeGen>
+void Compiler::compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree))
+{
+ LclVarDsc* varDsc;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (tree != nullptr)
+ {
+ Compiler::printTreeID(tree);
+ }
+ printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
+ dumpConvertedVarSet(this, compCurLife);
+ printf(" -> %s ", VarSetOps::ToString(this, newLife));
+ dumpConvertedVarSet(this, newLife);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* We should only be called when the live set has actually changed */
+
+ noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
+
+ if (!ForCodeGen)
+ {
+ VarSetOps::Assign(this, compCurLife, newLife);
+ return;
+ }
+
+ /* Figure out which variables are becoming live/dead at this point */
+
+ // deadSet = compCurLife - newLife
+ VARSET_TP VARSET_INIT(this, deadSet, compCurLife);
+ VarSetOps::DiffD(this, deadSet, newLife);
+
+ // bornSet = newLife - compCurLife
+ VARSET_TP VARSET_INIT(this, bornSet, newLife);
+ VarSetOps::DiffD(this, bornSet, compCurLife);
+
+ /* Can't simultaneously become live and dead at the same time */
+
+ // (deadSet UNION bornSet) != EMPTY
+ noway_assert(!VarSetOps::IsEmpty(this, VarSetOps::Union(this, deadSet, bornSet)));
+ // (deadSet INTERSECTION bornSet) == EMPTY
+ noway_assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, deadSet, bornSet)));
+
+#ifdef LEGACY_BACKEND
+ // In the LEGACY_BACKEND case, we only consider variables that are fully enregisterd
+ // and there may be none.
+ VarSetOps::IntersectionD(this, deadSet, raRegVarsMask);
+ VarSetOps::IntersectionD(this, bornSet, raRegVarsMask);
+ // And all gcTrkStkPtrLcls that are now live will be on the stack
+ VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
+ VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+#endif // LEGACY_BACKEND
+
+ VarSetOps::Assign(this, compCurLife, newLife);
+
+ // Handle the dying vars first, then the newly live vars.
+ // This is because, in the RyuJIT backend case, they may occupy registers that
+ // will be occupied by another var that is newly live.
+ VARSET_ITER_INIT(this, deadIter, deadSet, deadVarIndex);
+ while (deadIter.NextElem(this, &deadVarIndex))
+ {
+ unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
+ varDsc = lvaTable + varNum;
+ bool isGCRef = (varDsc->TypeGet() == TYP_REF);
+ bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
+
+ if (varDsc->lvIsInReg())
+ {
+ // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
+ // gc sets
+ regMaskTP regMask = varDsc->lvRegMask();
+ if (isGCRef)
+ {
+ codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
+ }
+ else if (isByRef)
+ {
+ codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
+ }
+ codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(tree));
+ }
+#ifndef LEGACY_BACKEND
+ // This isn't in a register, so update the gcVarPtrSetCur.
+ // (Note that in the LEGACY_BACKEND case gcVarPtrSetCur is updated above unconditionally
+ // for all gcTrkStkPtrLcls in newLife, because none of them ever live in a register.)
+ else if (isGCRef || isByRef)
+ {
+ VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
+ JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
+ }
+#endif // !LEGACY_BACKEND
+ }
+
+ VARSET_ITER_INIT(this, bornIter, bornSet, bornVarIndex);
+ while (bornIter.NextElem(this, &bornVarIndex))
+ {
+ unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
+ varDsc = lvaTable + varNum;
+ bool isGCRef = (varDsc->TypeGet() == TYP_REF);
+ bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
+
+ if (varDsc->lvIsInReg())
+ {
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+ if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
+#endif // !LEGACY_BACKEND
+ codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(tree));
+ regMaskTP regMask = varDsc->lvRegMask();
+ if (isGCRef)
+ {
+ codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
+ }
+ else if (isByRef)
+ {
+ codeGen->gcInfo.gcRegByrefSetCur |= regMask;
+ }
+ }
+#ifndef LEGACY_BACKEND
+ // This isn't in a register, so update the gcVarPtrSetCur
+ else if (lvaIsGCTracked(varDsc))
+ {
+ VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
+ JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
+ }
+#endif // !LEGACY_BACKEND
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ codeGen->siUpdate();
+#endif
+}
+
+// Need an explicit instantiation.
+template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree));
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Get the mask of integer registers that contain 'live' enregistered
+ * local variables after "tree".
+ *
+ * The output is the mask of integer registers that are currently
+ * alive and holding the enregistered local variables.
+ */
+regMaskTP CodeGenInterface::genLiveMask(GenTreePtr tree)
+{
+ regMaskTP liveMask = regSet.rsMaskVars;
+
+ GenTreePtr nextNode;
+ if (compiler->compCurLifeTree == nullptr)
+ {
+ assert(compiler->compCurStmt != nullptr);
+ nextNode = compiler->compCurStmt->gtStmt.gtStmtList;
+ }
+ else
+ {
+ nextNode = compiler->compCurLifeTree->gtNext;
+ }
+
+ // Theoretically, we should always be able to find "tree" by walking
+ // forward in execution order. But unfortunately, there is at least
+ // one case (addressing) where a node may be evaluated out of order
+ // So, we have to handle that case
+ bool outOfOrder = false;
+ for (; nextNode != tree->gtNext; nextNode = nextNode->gtNext)
+ {
+ if (nextNode == nullptr)
+ {
+ outOfOrder = true;
+ break;
+ }
+ if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
+ {
+ bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
+ bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
+ if (isBorn || isDying)
+ {
+ regMaskTP regMask = genGetRegMask(nextNode);
+ if (regMask != RBM_NONE)
+ {
+ if (isBorn)
+ {
+ liveMask |= regMask;
+ }
+ else
+ {
+ liveMask &= ~(regMask);
+ }
+ }
+ }
+ }
+ }
+ if (outOfOrder)
+ {
+ assert(compiler->compCurLifeTree != nullptr);
+ liveMask = regSet.rsMaskVars;
+ // We were unable to find "tree" by traversing forward. We must now go
+ // backward from compiler->compCurLifeTree instead. We have to start with compiler->compCurLifeTree,
+ // since regSet.rsMaskVars reflects its completed execution
+ for (nextNode = compiler->compCurLifeTree; nextNode != tree; nextNode = nextNode->gtPrev)
+ {
+ assert(nextNode != nullptr);
+
+ if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
+ {
+ bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
+ bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
+ if (isBorn || isDying)
+ {
+ regMaskTP regMask = genGetRegMask(nextNode);
+ if (regMask != RBM_NONE)
+ {
+ // We're going backward - so things born are removed
+ // and vice versa
+ if (isBorn)
+ {
+ liveMask &= ~(regMask);
+ }
+ else
+ {
+ liveMask |= regMask;
+ }
+ }
+ }
+ }
+ }
+ }
+ return liveMask;
+}
+
+/*****************************************************************************
+ *
+ * Get the mask of integer registers that contain 'live' enregistered
+ * local variables.
+
+ * The input is a liveSet which contains a set of local
+ * variables that are currently alive
+ *
+ * The output is the mask of x86 integer registers that are currently
+ * alive and holding the enregistered local variables
+ */
+
+regMaskTP CodeGenInterface::genLiveMask(VARSET_VALARG_TP liveSet)
+{
+ // Check for the zero LiveSet mask
+ if (VarSetOps::IsEmpty(compiler, liveSet))
+ {
+ return RBM_NONE;
+ }
+
+ // set if our liveSet matches the one we have cached: genLastLiveSet -> genLastLiveMask
+ if (VarSetOps::Equal(compiler, liveSet, genLastLiveSet))
+ {
+ return genLastLiveMask;
+ }
+
+ regMaskTP liveMask = 0;
+
+ VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+
+ // If the variable is not enregistered, then it can't contribute to the liveMask
+ if (!VarSetOps::IsMember(compiler, compiler->raRegVarsMask, varIndex))
+ {
+ continue;
+ }
+
+ // Find the variable in compiler->lvaTable
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+#if !FEATURE_FP_REGALLOC
+ // If the variable is a floating point type, then it can't contribute to the liveMask
+ if (varDsc->IsFloatRegType())
+ {
+ continue;
+ }
+#endif
+
+ noway_assert(compiler->lvaTable[varNum].lvRegister);
+ regMaskTP regBit;
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ regBit = genRegMask(varDsc->lvRegNum);
+
+ // For longs we may have two regs
+ if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+ {
+ regBit |= genRegMask(varDsc->lvOtherReg);
+ }
+ }
+
+ noway_assert(regBit != 0);
+
+ // We should not already have any of these bits set
+ noway_assert((liveMask & regBit) == 0);
+
+ // Update the liveMask with the register bits that are live
+ liveMask |= regBit;
+ }
+
+ // cache the last mapping between gtLiveSet -> liveMask
+ VarSetOps::Assign(compiler, genLastLiveSet, liveSet);
+ genLastLiveMask = liveMask;
+
+ return liveMask;
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ * Generate a spill.
+ */
+void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+ getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
+}
+
+/*****************************************************************************
+ *
+ * Generate a reload.
+ */
+void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+ getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
+}
+
+#ifdef LEGACY_BACKEND
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+void CodeGenInterface::reloadFloatReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+ var_types tmpType = tmp->tdTempType();
+ getEmitter()->emitIns_R_S(ins_FloatLoad(type), emitActualTypeSize(tmpType), reg, tmp->tdTempNum(), 0);
+}
+#endif
+#endif // LEGACY_BACKEND
+
+// inline
+regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call)
+{
+ noway_assert(call->IsCall());
+ return REG_ARG_0;
+}
+
+//----------------------------------------------------------------------
+// getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
+//
+// Arguments:
+// tree - spilled GenTree node
+//
+// Return Value:
+// TempDsc corresponding to tree
+TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
+{
+ // tree must be in spilled state.
+ assert((tree->gtFlags & GTF_SPILLED) != 0);
+
+ // Get the tree's SpillDsc.
+ RegSet::SpillDsc* prevDsc;
+ RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
+ assert(spillDsc != nullptr);
+
+ // Get the temp desc.
+ TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
+ return temp;
+}
+
+#ifdef _TARGET_XARCH_
+
+#ifdef _TARGET_AMD64_
+// Returns relocation type hint for an addr.
+// Note that there are no reloc hints on x86.
+//
+// Arguments
+// addr - data address
+//
+// Returns
+// relocation type hint
+//
+unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
+{
+ return compiler->eeGetRelocTypeHint((void*)addr);
+}
+#endif //_TARGET_AMD64_
+
+// Return true if an absolute indirect data address can be encoded as IP-relative.
+// offset. Note that this method should be used only when the caller knows that
+// the address is an icon value that VM has given and there is no GenTree node
+// representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+// addr - an absolute indirect data address
+//
+// Returns
+// true if indir data addr could be encoded as IP-relative offset.
+//
+bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
+{
+#ifdef _TARGET_AMD64_
+ return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
+#else
+ // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
+ return false;
+#endif
+}
+
+// Return true if an indirect code address can be encoded as IP-relative offset.
+// Note that this method should be used only when the caller knows that the
+// address is an icon value that VM has given and there is no GenTree node
+// representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+// addr - an absolute indirect code address
+//
+// Returns
+// true if indir code addr could be encoded as IP-relative offset.
+//
+bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
+{
+#ifdef _TARGET_AMD64_
+ return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
+#else
+ // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
+ return true;
+#endif
+}
+
+// Return true if an indirect code address can be encoded as 32-bit displacement
+// relative to zero. Note that this method should be used only when the caller
+// knows that the address is an icon value that VM has given and there is no
+// GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+// addr - absolute indirect code address
+//
+// Returns
+// true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
+//
+bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
+{
+ return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
+}
+
+// Return true if an absolute indirect code address needs a relocation recorded with VM.
+//
+// Arguments
+// addr - an absolute indirect code address
+//
+// Returns
+// true if indir code addr needs a relocation recorded with VM
+//
+bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
+{
+ // If generating relocatable ngen code, then all code addr should go through relocation
+ if (compiler->opts.compReloc)
+ {
+ return true;
+ }
+
+#ifdef _TARGET_AMD64_
+ // If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation.
+ if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr))
+ {
+ return true;
+ }
+
+ // It could be possible that the code indir addr could be encoded as 32-bit displacement relative
+ // to zero. But we don't need to emit a relocation in that case.
+ return false;
+#else //_TARGET_X86_
+ // On x86 there is need for recording relocations during jitting,
+ // because all addrs fit within 32-bits.
+ return false;
+#endif //_TARGET_X86_
+}
+
+// Return true if a direct code address needs to be marked as relocatable.
+//
+// Arguments
+// addr - absolute direct code address
+//
+// Returns
+// true if direct code addr needs a relocation recorded with VM
+//
+bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
+{
+ // If generating relocatable ngen code, then all code addr should go through relocation
+ if (compiler->opts.compReloc)
+ {
+ return true;
+ }
+
+#ifdef _TARGET_AMD64_
+ // By default all direct code addresses go through relocation so that VM will setup
+ // a jump stub if addr cannot be encoded as pc-relative offset.
+ return true;
+#else //_TARGET_X86_
+ // On x86 there is no need for recording relocations during jitting,
+ // because all addrs fit within 32-bits.
+ return false;
+#endif //_TARGET_X86_
+}
+#endif //_TARGET_XARCH_
+
+/*****************************************************************************
+ *
+ * The following can be used to create basic blocks that serve as labels for
+ * the emitter. Use with caution - these are not real basic blocks!
+ *
+ */
+
+// inline
+BasicBlock* CodeGen::genCreateTempLabel()
+{
+#ifdef DEBUG
+ // These blocks don't affect FP
+ compiler->fgSafeBasicBlockCreation = true;
+#endif
+
+ BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
+
+#ifdef DEBUG
+ compiler->fgSafeBasicBlockCreation = false;
+#endif
+
+ block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ // Use coldness of current block, as this label will
+ // be contained in it.
+ block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
+
+#ifdef DEBUG
+ block->bbTgtStkDepth = genStackLevel / sizeof(int);
+#endif
+ return block;
+}
+
+// inline
+void CodeGen::genDefineTempLabel(BasicBlock* label)
+{
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, label->bbNum);
+ }
+#endif
+
+ label->bbEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+ /* gcInfo.gcRegGCrefSetCur does not account for redundant load-suppression
+ of GC vars, and the emitter will not know about */
+
+ regTracker.rsTrackRegClrPtr();
+}
+
+/*****************************************************************************
+ *
+ * Adjust the stack pointer by the given value; assumes that this follows
+ * a call so only callee-saved registers (and registers that may hold a
+ * return value) are used at this point.
+ */
+
+void CodeGen::genAdjustSP(ssize_t delta)
+{
+#ifdef _TARGET_X86_
+ if (delta == sizeof(int))
+ inst_RV(INS_pop, REG_ECX, TYP_INT);
+ else
+#endif
+ inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
+}
+
+#ifdef _TARGET_ARM_
+// return size
+// alignmentWB is out param
+unsigned CodeGenInterface::InferOpSizeAlign(GenTreePtr op, unsigned* alignmentWB)
+{
+ unsigned alignment = 0;
+ unsigned opSize = 0;
+
+ if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
+ {
+ opSize = InferStructOpSizeAlign(op, &alignment);
+ }
+ else
+ {
+ alignment = genTypeAlignments[op->TypeGet()];
+ opSize = genTypeSizes[op->TypeGet()];
+ }
+
+ assert(opSize != 0);
+ assert(alignment != 0);
+
+ (*alignmentWB) = alignment;
+ return opSize;
+}
+// return size
+// alignmentWB is out param
+unsigned CodeGenInterface::InferStructOpSizeAlign(GenTreePtr op, unsigned* alignmentWB)
+{
+ unsigned alignment = 0;
+ unsigned opSize = 0;
+
+ while (op->gtOper == GT_COMMA)
+ {
+ op = op->gtOp.gtOp2;
+ }
+
+ if (op->gtOper == GT_OBJ)
+ {
+ CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
+ opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+ alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+ }
+ else if (op->gtOper == GT_LCL_VAR)
+ {
+ unsigned varNum = op->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ assert(varDsc->lvType == TYP_STRUCT);
+ opSize = varDsc->lvSize();
+ if (varDsc->lvStructDoubleAlign)
+ {
+ alignment = TARGET_POINTER_SIZE * 2;
+ }
+ else
+ {
+ alignment = TARGET_POINTER_SIZE;
+ }
+ }
+ else if (op->OperIsCopyBlkOp())
+ {
+ GenTreePtr op2 = op->gtOp.gtOp2;
+
+ if (op2->OperGet() == GT_CNS_INT)
+ {
+ if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
+ {
+ CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
+ opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+ alignment =
+ roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ opSize = op2->gtIntCon.gtIconVal;
+ GenTreePtr op1 = op->gtOp.gtOp1;
+ assert(op1->OperGet() == GT_LIST);
+ GenTreePtr dstAddr = op1->gtOp.gtOp1;
+ if (dstAddr->OperGet() == GT_ADDR)
+ {
+ InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
+ }
+ else
+ {
+ assert(!"Unhandle dstAddr node");
+ alignment = TARGET_POINTER_SIZE;
+ }
+ }
+ }
+ else
+ {
+ noway_assert(!"Variable sized COPYBLK register arg!");
+ opSize = 0;
+ alignment = TARGET_POINTER_SIZE;
+ }
+ }
+ else if (op->gtOper == GT_MKREFANY)
+ {
+ opSize = TARGET_POINTER_SIZE * 2;
+ alignment = TARGET_POINTER_SIZE;
+ }
+ else if (op->IsArgPlaceHolderNode())
+ {
+ CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
+ assert(clsHnd != 0);
+ opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+ alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ assert(!"Unhandled gtOper");
+ opSize = TARGET_POINTER_SIZE;
+ alignment = TARGET_POINTER_SIZE;
+ }
+
+ assert(opSize != 0);
+ assert(alignment != 0);
+
+ (*alignmentWB) = alignment;
+ return opSize;
+}
+
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Take an address expression and try to find the best set of components to
+ * form an address mode; returns non-zero if this is successful.
+ *
+ * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
+ * Refactor this code so that the underlying analysis can be used in
+ * the RyuJIT Backend to do lowering, instead of having to call this method with the
+ * option to not generate the code.
+ *
+ * 'fold' specifies if it is OK to fold the array index which hangs off
+ * a GT_NOP node.
+ *
+ * If successful, the parameters will be set to the following values:
+ *
+ * *rv1Ptr ... base operand
+ * *rv2Ptr ... optional operand
+ * *revPtr ... true if rv2 is before rv1 in the evaluation order
+ * #if SCALED_ADDR_MODES
+ * *mulPtr ... optional multiplier (2/4/8) for rv2
+ * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
+ * #endif
+ * *cnsPtr ... integer constant [optional]
+ *
+ * The 'mode' parameter may have one of the following values:
+ *
+ * #if LEA_AVAILABLE
+ * +1 ... we're trying to compute a value via 'LEA'
+ * #endif
+ *
+ * 0 ... we're trying to form an address mode
+ *
+ * -1 ... we're generating code for an address mode,
+ * and thus the address must already form an
+ * address mode (without any further work)
+ *
+ * IMPORTANT NOTE: This routine doesn't generate any code, it merely
+ * identifies the components that might be used to
+ * form an address mode later on.
+ */
+
+bool CodeGen::genCreateAddrMode(GenTreePtr addr,
+ int mode,
+ bool fold,
+ regMaskTP regMask,
+ bool* revPtr,
+ GenTreePtr* rv1Ptr,
+ GenTreePtr* rv2Ptr,
+#if SCALED_ADDR_MODES
+ unsigned* mulPtr,
+#endif
+ unsigned* cnsPtr,
+ bool nogen)
+{
+#ifndef LEGACY_BACKEND
+ assert(nogen == true);
+#endif // !LEGACY_BACKEND
+
+ /*
+ The following indirections are valid address modes on x86/x64:
+
+ [ icon] * not handled here
+ [reg ] * not handled here
+ [reg + icon]
+ [reg2 + reg1 ]
+ [reg2 + reg1 + icon]
+ [reg2 + 2 * reg1 ]
+ [reg2 + 4 * reg1 ]
+ [reg2 + 8 * reg1 ]
+ [ 2 * reg1 + icon]
+ [ 4 * reg1 + icon]
+ [ 8 * reg1 + icon]
+ [reg2 + 2 * reg1 + icon]
+ [reg2 + 4 * reg1 + icon]
+ [reg2 + 8 * reg1 + icon]
+
+ The following indirections are valid address modes on arm64:
+
+ [reg]
+ [reg + icon]
+ [reg2 + reg1]
+ [reg2 + reg1 * natural-scale]
+
+ */
+
+ /* All indirect address modes require the address to be an addition */
+
+ if (addr->gtOper != GT_ADD)
+ {
+ return false;
+ }
+
+ // Can't use indirect addressing mode as we need to check for overflow.
+ // Also, can't use 'lea' as it doesn't set the flags.
+
+ if (addr->gtOverflow())
+ {
+ return false;
+ }
+
+ GenTreePtr rv1 = nullptr;
+ GenTreePtr rv2 = nullptr;
+
+ GenTreePtr op1;
+ GenTreePtr op2;
+
+ ssize_t cns;
+#if SCALED_ADDR_MODES
+ unsigned mul;
+#endif
+
+ GenTreePtr tmp;
+
+ /* What order are the sub-operands to be evaluated */
+
+ if (addr->gtFlags & GTF_REVERSE_OPS)
+ {
+ op1 = addr->gtOp.gtOp2;
+ op2 = addr->gtOp.gtOp1;
+ }
+ else
+ {
+ op1 = addr->gtOp.gtOp1;
+ op2 = addr->gtOp.gtOp2;
+ }
+
+ bool rev = false; // Is op2 first in the evaluation order?
+
+ /*
+ A complex address mode can combine the following operands:
+
+ op1 ... base address
+ op2 ... optional scaled index
+#if SCALED_ADDR_MODES
+ mul ... optional multiplier (2/4/8) for op2
+#endif
+ cns ... optional displacement
+
+ Here we try to find such a set of operands and arrange for these
+ to sit in registers.
+ */
+
+ cns = 0;
+#if SCALED_ADDR_MODES
+ mul = 0;
+#endif
+
+AGAIN:
+ /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
+ constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
+ here if we find a scaled index.
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if SCALED_ADDR_MODES
+ assert(mul == 0);
+#endif
+
+#ifdef LEGACY_BACKEND
+ /* Check both operands as far as being register variables */
+
+ if (mode != -1)
+ {
+ if (op1->gtOper == GT_LCL_VAR)
+ genMarkLclVar(op1);
+ if (op2->gtOper == GT_LCL_VAR)
+ genMarkLclVar(op2);
+ }
+#endif // LEGACY_BACKEND
+
+ /* Special case: keep constants as 'op2' */
+
+ if (op1->IsCnsIntOrI())
+ {
+ // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
+ tmp = op1;
+ op1 = op2;
+ op2 = tmp;
+ }
+
+ /* Check for an addition of a constant */
+
+ if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
+ {
+ /* We're adding a constant */
+
+ cns += op2->gtIntConCommon.IconValue();
+
+#ifdef LEGACY_BACKEND
+ /* Can (and should) we use "add reg, icon" ? */
+
+ if ((op1->gtFlags & GTF_REG_VAL) && mode == 1 && !nogen)
+ {
+ regNumber reg1 = op1->gtRegNum;
+
+ if ((regMask == 0 || (regMask & genRegMask(reg1))) && genRegTrashable(reg1, addr))
+ {
+ // In case genMarkLclVar(op1) bashed it above and it is
+ // the last use of the variable.
+
+ genUpdateLife(op1);
+
+ /* 'reg1' is trashable, so add "icon" into it */
+
+ genIncRegBy(reg1, cns, addr, addr->TypeGet());
+
+ genUpdateLife(addr);
+ return true;
+ }
+ }
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_ARM64_
+ if (cns == 0)
+#endif
+ {
+ /* Inspect the operand the constant is being added to */
+
+ switch (op1->gtOper)
+ {
+ case GT_ADD:
+
+ if (op1->gtOverflow())
+ {
+ break;
+ }
+
+ op2 = op1->gtOp.gtOp2;
+ op1 = op1->gtOp.gtOp1;
+
+ goto AGAIN;
+
+#if SCALED_ADDR_MODES && !defined(_TARGET_ARM64_)
+ // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+ case GT_MUL:
+ if (op1->gtOverflow())
+ {
+ return false; // Need overflow check
+ }
+
+ __fallthrough;
+
+ case GT_LSH:
+
+ mul = op1->GetScaledIndex();
+ if (mul)
+ {
+ /* We can use "[mul*rv2 + icon]" */
+
+ rv1 = nullptr;
+ rv2 = op1->gtOp.gtOp1;
+
+ goto FOUND_AM;
+ }
+ break;
+#endif
+
+ default:
+ break;
+ }
+ }
+
+ /* The best we can do is "[rv1 + icon]" */
+
+ rv1 = op1;
+ rv2 = nullptr;
+
+ goto FOUND_AM;
+ }
+
+ /* op2 is not a constant. So keep on trying.
+ Does op1 or op2 already sit in a register? */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ /* op1 is sitting in a register */
+ }
+ else if (op2->gtFlags & GTF_REG_VAL)
+ {
+ /* op2 is sitting in a register. Keep the enregistered value as op1 */
+
+ tmp = op1;
+ op1 = op2;
+ op2 = tmp;
+
+ noway_assert(rev == false);
+ rev = true;
+ }
+ else
+ {
+ /* Neither op1 nor op2 are sitting in a register right now */
+
+ switch (op1->gtOper)
+ {
+#ifndef _TARGET_ARM64_
+ // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+ case GT_ADD:
+
+ if (op1->gtOverflow())
+ {
+ break;
+ }
+
+ if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
+ {
+ cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op1 = op1->gtOp.gtOp1;
+
+ goto AGAIN;
+ }
+
+ break;
+
+#if SCALED_ADDR_MODES
+
+ case GT_MUL:
+
+ if (op1->gtOverflow())
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_LSH:
+
+ mul = op1->GetScaledIndex();
+ if (mul)
+ {
+ /* 'op1' is a scaled value */
+
+ rv1 = op2;
+ rv2 = op1->gtOp.gtOp1;
+
+ int argScale;
+ while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+ {
+ if (jitIsScaleIndexMul(argScale * mul))
+ {
+ mul = mul * argScale;
+ rv2 = rv2->gtOp.gtOp1;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ noway_assert(rev == false);
+ rev = true;
+
+ goto FOUND_AM;
+ }
+ break;
+
+#endif // SCALED_ADDR_MODES
+#endif // !_TARGET_ARM64_
+
+ case GT_NOP:
+
+ if (!nogen)
+ {
+ break;
+ }
+
+ op1 = op1->gtOp.gtOp1;
+ goto AGAIN;
+
+ case GT_COMMA:
+
+ if (!nogen)
+ {
+ break;
+ }
+
+ op1 = op1->gtOp.gtOp2;
+ goto AGAIN;
+
+ default:
+ break;
+ }
+
+ noway_assert(op2);
+ switch (op2->gtOper)
+ {
+#ifndef _TARGET_ARM64_
+ // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+ case GT_ADD:
+
+ if (op2->gtOverflow())
+ {
+ break;
+ }
+
+ if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
+ {
+ cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op2 = op2->gtOp.gtOp1;
+
+ goto AGAIN;
+ }
+
+ break;
+
+#if SCALED_ADDR_MODES
+
+ case GT_MUL:
+
+ if (op2->gtOverflow())
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_LSH:
+
+ mul = op2->GetScaledIndex();
+ if (mul)
+ {
+ // 'op2' is a scaled value...is it's argument also scaled?
+ int argScale;
+ rv2 = op2->gtOp.gtOp1;
+ while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+ {
+ if (jitIsScaleIndexMul(argScale * mul))
+ {
+ mul = mul * argScale;
+ rv2 = rv2->gtOp.gtOp1;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ rv1 = op1;
+
+ goto FOUND_AM;
+ }
+ break;
+
+#endif // SCALED_ADDR_MODES
+#endif // !_TARGET_ARM64_
+
+ case GT_NOP:
+
+ if (!nogen)
+ {
+ break;
+ }
+
+ op2 = op2->gtOp.gtOp1;
+ goto AGAIN;
+
+ case GT_COMMA:
+
+ if (!nogen)
+ {
+ break;
+ }
+
+ op2 = op2->gtOp.gtOp2;
+ goto AGAIN;
+
+ default:
+ break;
+ }
+
+ goto ADD_OP12;
+ }
+
+ /* op1 is in a register.
+ Is op2 an addition or a scaled value? */
+
+ noway_assert(op2);
+
+#ifndef _TARGET_ARM64_
+ // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+ switch (op2->gtOper)
+ {
+ case GT_ADD:
+
+ if (op2->gtOverflow())
+ {
+ break;
+ }
+
+ if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
+ {
+ cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op2 = op2->gtOp.gtOp1;
+ goto AGAIN;
+ }
+
+ break;
+
+#if SCALED_ADDR_MODES
+
+ case GT_MUL:
+
+ if (op2->gtOverflow())
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_LSH:
+
+ mul = op2->GetScaledIndex();
+ if (mul)
+ {
+ rv1 = op1;
+ rv2 = op2->gtOp.gtOp1;
+ int argScale;
+ while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+ {
+ if (jitIsScaleIndexMul(argScale * mul))
+ {
+ mul = mul * argScale;
+ rv2 = rv2->gtOp.gtOp1;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ goto FOUND_AM;
+ }
+ break;
+
+#endif // SCALED_ADDR_MODES
+
+ default:
+ break;
+ }
+#endif // !_TARGET_ARM64_
+
+ADD_OP12:
+
+ /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
+
+ rv1 = op1;
+ rv2 = op2;
+#ifdef _TARGET_ARM64_
+ assert(cns == 0);
+#endif
+
+FOUND_AM:
+
+#ifdef LEGACY_BACKEND
+ /* Check for register variables */
+
+ if (mode != -1)
+ {
+ if (rv1 && rv1->gtOper == GT_LCL_VAR)
+ genMarkLclVar(rv1);
+ if (rv2 && rv2->gtOper == GT_LCL_VAR)
+ genMarkLclVar(rv2);
+ }
+#endif // LEGACY_BACKEND
+
+ if (rv2)
+ {
+ /* Make sure a GC address doesn't end up in 'rv2' */
+
+ if (varTypeIsGC(rv2->TypeGet()))
+ {
+ noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
+
+ tmp = rv1;
+ rv1 = rv2;
+ rv2 = tmp;
+
+ rev = !rev;
+ }
+
+ /* Special case: constant array index (that is range-checked) */
+
+ if (fold)
+ {
+ ssize_t tmpMul;
+ GenTreePtr index;
+
+ if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
+ {
+ /* For valuetype arrays where we can't use the scaled address
+ mode, rv2 will point to the scaled index. So we have to do
+ more work */
+
+ tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
+ if (mul)
+ {
+ tmpMul *= mul;
+ }
+ }
+ else
+ {
+ /* May be a simple array. rv2 will points to the actual index */
+
+ index = rv2;
+ tmpMul = mul;
+ }
+
+ /* Get hold of the array index and see if it's a constant */
+ if (index->IsIntCnsFitsInI32())
+ {
+ /* Get hold of the index value */
+ ssize_t ixv = index->AsIntConCommon()->IconValue();
+
+#if SCALED_ADDR_MODES
+ /* Scale the index if necessary */
+ if (tmpMul)
+ {
+ ixv *= tmpMul;
+ }
+#endif
+
+ if (FitsIn<INT32>(cns + ixv))
+ {
+ /* Add the scaled index to the offset value */
+
+ cns += ixv;
+
+#if SCALED_ADDR_MODES
+ /* There is no scaled operand any more */
+ mul = 0;
+#endif
+ rv2 = nullptr;
+ }
+ }
+ }
+ }
+
+ // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
+ noway_assert(rv1 || mul != 1);
+
+ noway_assert(FitsIn<INT32>(cns));
+
+ /* Success - return the various components to the caller */
+
+ *revPtr = rev;
+ *rv1Ptr = rv1;
+ *rv2Ptr = rv2;
+#if SCALED_ADDR_MODES
+ *mulPtr = mul;
+#endif
+ *cnsPtr = (unsigned)cns;
+
+ return true;
+}
+
+/*****************************************************************************
+* The condition to use for (the jmp/set for) the given type of operation
+*
+* In case of amd64, this routine should be used when there is no gentree available
+* and one needs to generate jumps based on integer comparisons. When gentree is
+* available always use its overloaded version.
+*
+*/
+
+// static
+emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
+{
+ const static BYTE genJCCinsSigned[] = {
+#if defined(_TARGET_XARCH_)
+ EJ_je, // GT_EQ
+ EJ_jne, // GT_NE
+ EJ_jl, // GT_LT
+ EJ_jle, // GT_LE
+ EJ_jge, // GT_GE
+ EJ_jg, // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+ EJ_eq, // GT_EQ
+ EJ_ne, // GT_NE
+ EJ_lt, // GT_LT
+ EJ_le, // GT_LE
+ EJ_ge, // GT_GE
+ EJ_gt, // GT_GT
+#endif
+ };
+
+ const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
+ {
+#if defined(_TARGET_XARCH_)
+ EJ_je, // GT_EQ
+ EJ_jne, // GT_NE
+ EJ_jb, // GT_LT
+ EJ_jbe, // GT_LE
+ EJ_jae, // GT_GE
+ EJ_ja, // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+ EJ_eq, // GT_EQ
+ EJ_ne, // GT_NE
+ EJ_lo, // GT_LT
+ EJ_ls, // GT_LE
+ EJ_hs, // GT_GE
+ EJ_hi, // GT_GT
+#endif
+ };
+
+ const static BYTE genJCCinsLogical[] = /* logical operation */
+ {
+#if defined(_TARGET_XARCH_)
+ EJ_je, // GT_EQ (Z == 1)
+ EJ_jne, // GT_NE (Z == 0)
+ EJ_js, // GT_LT (S == 1)
+ EJ_NONE, // GT_LE
+ EJ_jns, // GT_GE (S == 0)
+ EJ_NONE, // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+ EJ_eq, // GT_EQ (Z == 1)
+ EJ_ne, // GT_NE (Z == 0)
+ EJ_mi, // GT_LT (N == 1)
+ EJ_NONE, // GT_LE
+ EJ_pl, // GT_GE (N == 0)
+ EJ_NONE, // GT_GT
+#endif
+ };
+
+#if defined(_TARGET_XARCH_)
+ assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
+ assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
+ assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
+ assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
+ assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
+
+ assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
+ assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
+ assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
+ assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
+ assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
+
+ assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
+ assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
+ assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
+#elif defined(_TARGET_ARMARCH_)
+ assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
+ assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
+ assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
+ assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
+ assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
+ assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
+
+ assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
+ assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
+ assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
+ assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
+ assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
+ assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
+
+ assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
+ assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
+ assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
+ assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
+#else
+ assert(!"unknown arch");
+#endif
+ assert(GenTree::OperIsCompare(cmp));
+
+ emitJumpKind result = EJ_COUNT;
+
+ if (compareKind == CK_UNSIGNED)
+ {
+ result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
+ }
+ else if (compareKind == CK_SIGNED)
+ {
+ result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
+ }
+ else if (compareKind == CK_LOGICAL)
+ {
+ result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
+ }
+ assert(result != EJ_COUNT);
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Generate an exit sequence for a return from a method (note: when compiling
+ * for speed there might be multiple exit points).
+ */
+
+void CodeGen::genExitCode(BasicBlock* block)
+{
+#ifdef DEBUGGING_SUPPORT
+ /* Just wrote the first instruction of the epilog - inform debugger
+ Note that this may result in a duplicate IPmapping entry, and
+ that this is ok */
+
+ // For non-optimized debuggable code, there is only one epilog.
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
+#endif // DEBUGGING_SUPPORT
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+ if (compiler->getNeedsGSSecurityCookie())
+ {
+ genEmitGSCookieCheck(jmpEpilog);
+
+ if (jmpEpilog)
+ {
+ // Dev10 642944 -
+ // The GS cookie check created a temp label that has no live
+ // incoming GC registers, we need to fix that
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ /* Figure out which register parameters hold pointers */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
+ varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+
+ gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
+ }
+
+ getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
+ getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
+ }
+ }
+
+ genReserveEpilog(block);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for an out-of-line exception.
+ * For debuggable code, we generate the 'throw' inline.
+ * For non-dbg code, we share the helper blocks created by fgAddCodeRef().
+ */
+
+void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTreePtr failBlk)
+{
+ if (!compiler->opts.compDbgCode)
+ {
+ /* For non-debuggable code, find and use the helper block for
+ raising the exception. The block may be shared by other trees too. */
+
+ BasicBlock* tgtBlk;
+
+ if (failBlk)
+ {
+ /* We already know which block to jump to. Use that. */
+
+ noway_assert(failBlk->gtOper == GT_LABEL);
+ tgtBlk = failBlk->gtLabel.gtLabBB;
+ noway_assert(
+ tgtBlk ==
+ compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB))->acdDstBlk);
+ }
+ else
+ {
+ /* Find the helper-block which raises the exception. */
+
+ Compiler::AddCodeDsc* add =
+ compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
+ PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
+ tgtBlk = add->acdDstBlk;
+ }
+
+ noway_assert(tgtBlk);
+
+ // Jump to the excption-throwing block on error.
+
+ inst_JMP(jumpKind, tgtBlk);
+ }
+ else
+ {
+ /* The code to throw the exception will be generated inline, and
+ we will jump around it in the normal non-exception case */
+
+ BasicBlock* tgtBlk = nullptr;
+ emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
+ if (reverseJumpKind != jumpKind)
+ {
+ tgtBlk = genCreateTempLabel();
+ inst_JMP(reverseJumpKind, tgtBlk);
+ }
+
+ genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
+
+ /* Define the spot for the normal non-exception case to jump to */
+ if (tgtBlk != nullptr)
+ {
+ assert(reverseJumpKind != jumpKind);
+ genDefineTempLabel(tgtBlk);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * The last operation done was generating code for "tree" and that would
+ * have set the flags. Check if the operation caused an overflow.
+ */
+
+// inline
+void CodeGen::genCheckOverflow(GenTreePtr tree)
+{
+ // Overflow-check should be asked for this tree
+ noway_assert(tree->gtOverflow());
+
+ const var_types type = tree->TypeGet();
+
+ // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
+ noway_assert(!varTypeIsSmall(type));
+
+ emitJumpKind jumpKind;
+
+#ifdef _TARGET_ARM64_
+ if (tree->OperGet() == GT_MUL)
+ {
+ jumpKind = EJ_ne;
+ }
+ else
+#endif
+ {
+ bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
+
+#if defined(_TARGET_XARCH_)
+
+ jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
+
+#elif defined(_TARGET_ARMARCH_)
+
+ jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
+
+ if (jumpKind == EJ_lo)
+ {
+ if ((tree->OperGet() != GT_SUB) && (tree->gtOper != GT_ASG_SUB))
+ {
+ jumpKind = EJ_hs;
+ }
+ }
+
+#endif // defined(_TARGET_ARMARCH_)
+ }
+
+ // Jump to the block which will throw the expection
+
+ genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Update the current funclet as needed by calling genUpdateCurrentFunclet().
+ * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
+ * is up-to-date.
+ *
+ */
+
+void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
+{
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
+ if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
+ {
+ assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
+ }
+ else
+ {
+ // We shouldn't see FUNC_ROOT
+ assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
+ assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
+ }
+ }
+ else
+ {
+ assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
+ if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
+ {
+ assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
+ }
+ else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
+ {
+ assert(!block->hasHndIndex());
+ }
+ else
+ {
+ assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
+ assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
+ }
+ }
+}
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Generate code for the function.
+ */
+
+void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genGenerateCode()\n");
+ compiler->fgDispBasicBlocks(compiler->verboseTrees);
+ }
+#endif
+
+ unsigned codeSize;
+ unsigned prologSize;
+ unsigned epilogSize;
+
+ void* consPtr;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+
+#if STACK_PROBES
+ genNeedPrologStackProbe = false;
+#endif
+
+ compiler->fgDebugCheckBBlist();
+#endif // DEBUG
+
+ /* This is the real thing */
+
+ genPrepForCompiler();
+
+ /* Prepare the emitter */
+ getEmitter()->Init();
+#ifdef DEBUG
+ VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
+#endif
+
+#ifdef DEBUG
+ if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
+ {
+ compiler->opts.disAsm = true;
+ }
+
+ if (compiler->opts.disAsm)
+ {
+ printf("; Assembly listing for method %s\n", compiler->info.compFullName);
+
+ printf("; Emitting ");
+
+ if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
+ {
+ printf("SMALL_CODE");
+ }
+ else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
+ {
+ printf("FAST_CODE");
+ }
+ else
+ {
+ printf("BLENDED_CODE");
+ }
+
+ printf(" for ");
+
+ if (compiler->info.genCPU == CPU_X86)
+ {
+ printf("generic X86 CPU");
+ }
+ else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
+ {
+ printf("Pentium 4");
+ }
+ else if (compiler->info.genCPU == CPU_X64)
+ {
+ if (compiler->canUseAVX())
+ {
+ printf("X64 CPU with AVX");
+ }
+ else
+ {
+ printf("X64 CPU with SSE2");
+ }
+ }
+
+ else if (compiler->info.genCPU == CPU_ARM)
+ {
+ printf("generic ARM CPU");
+ }
+
+ printf("\n");
+
+ if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
+ {
+ printf("; optimized code\n");
+ }
+ else if (compiler->opts.compDbgCode)
+ {
+ printf("; debuggable code\n");
+ }
+ else if (compiler->opts.MinOpts())
+ {
+ printf("; compiler->opts.MinOpts() is true\n");
+ }
+ else
+ {
+ printf("; unknown optimization flags\n");
+ }
+
+#if DOUBLE_ALIGN
+ if (compiler->genDoubleAlign())
+ printf("; double-aligned frame\n");
+ else
+#endif
+ printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
+
+ if (genInterruptible)
+ {
+ printf("; fully interruptible\n");
+ }
+ else
+ {
+ printf("; partially interruptible\n");
+ }
+
+ if (compiler->fgHaveProfileData())
+ {
+ printf("; with IBC profile data\n");
+ }
+
+ if (compiler->fgProfileData_ILSizeMismatch)
+ {
+ printf("; discarded IBC profile data due to mismatch in ILSize\n");
+ }
+ }
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+
+ // For RyuJIT backend, we compute the final frame layout before code generation. This is because LSRA
+ // has already computed exactly the maximum concurrent number of spill temps of each type that are
+ // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
+ // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
+ // memory from the VM.
+
+ genFinalizeFrame();
+
+ unsigned maxTmpSize = compiler->tmpSize; // This is precise after LSRA has pre-allocated the temps.
+
+#else // LEGACY_BACKEND
+
+ // Estimate the frame size: first, estimate the number of spill temps needed by taking the register
+ // predictor spill temp estimates and stress levels into consideration. Then, compute the tentative
+ // frame layout using conservative callee-save register estimation (namely, guess they'll all be used
+ // and thus saved on the frame).
+
+ // Compute the maximum estimated spill temp size.
+ unsigned maxTmpSize = sizeof(double) + sizeof(float) + sizeof(__int64) + sizeof(void*);
+
+ maxTmpSize += (compiler->tmpDoubleSpillMax * sizeof(double)) + (compiler->tmpIntSpillMax * sizeof(int));
+
+#ifdef DEBUG
+
+ /* When StressRegs is >=1, there will be a bunch of spills not predicted by
+ the predictor (see logic in rsPickReg). It will be very hard to teach
+ the predictor about the behavior of rsPickReg for StressRegs >= 1, so
+ instead let's make maxTmpSize large enough so that we won't be wrong.
+ This means that at StressRegs >= 1, we will not be testing the logic
+ that sets the maxTmpSize size.
+ */
+
+ if (regSet.rsStressRegs() >= 1)
+ {
+ maxTmpSize += (REG_TMP_ORDER_COUNT * REGSIZE_BYTES);
+ }
+
+ // JIT uses 2 passes when assigning stack variable (i.e. args, temps, and locals) locations in varDsc->lvStkOffs.
+ // During the 1st pass (in genGenerateCode), it estimates the maximum possible size for stack temps
+ // and put it in maxTmpSize. Then it calculates the varDsc->lvStkOffs for each variable based on this estimation.
+ // However during stress mode, we might spill more temps on the stack, which might grow the
+ // size of the temp area.
+ // This might cause varDsc->lvStkOffs to change during the 2nd pass (in emitEndCodeGen).
+ // If the change of varDsc->lvStkOffs crosses the threshold for the instruction size,
+ // we will then have a mismatched estimated code size (during the 1st pass) and the actual emitted code size
+ // (during the 2nd pass).
+ // Also, if STRESS_UNSAFE_BUFFER_CHECKS is turned on, we might reorder the stack variable locations,
+ // which could cause the mismatch too.
+ //
+ // The following code is simply bump the maxTmpSize up to at least BYTE_MAX+1 during the stress mode, so that
+ // we don't run into code size problem during stress.
+
+ if (getJitStressLevel() != 0)
+ {
+ if (maxTmpSize < BYTE_MAX + 1)
+ {
+ maxTmpSize = BYTE_MAX + 1;
+ }
+ }
+#endif // DEBUG
+
+ /* Estimate the offsets of locals/arguments and size of frame */
+
+ unsigned lclSize = compiler->lvaFrameSize(Compiler::TENTATIVE_FRAME_LAYOUT);
+
+#ifdef DEBUG
+ //
+ // Display the local frame offsets that we have tentatively decided upon
+ //
+ if (verbose)
+ {
+ compiler->lvaTableDump();
+ }
+#endif // DEBUG
+
+#endif // LEGACY_BACKEND
+
+ getEmitter()->emitBegFN(isFramePointerUsed()
+#if defined(DEBUG)
+ ,
+ (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
+ !(compiler->opts.eeFlags & CORJIT_FLG_PREJIT)
+#endif
+#ifdef LEGACY_BACKEND
+ ,
+ lclSize
+#endif // LEGACY_BACKEND
+ ,
+ maxTmpSize);
+
+ /* Now generate code for the function */
+ genCodeForBBlist();
+
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+ // After code generation, dump the frame layout again. It should be the same as before code generation, if code
+ // generation hasn't touched it (it shouldn't!).
+ if (verbose)
+ {
+ compiler->lvaTableDump();
+ }
+#endif // DEBUG
+#endif // !LEGACY_BACKEND
+
+ /* We can now generate the function prolog and epilog */
+
+ genGeneratePrologsAndEpilogs();
+
+ /* Bind jump distances */
+
+ getEmitter()->emitJumpDistBind();
+
+ /* The code is now complete and final; it should not change after this. */
+
+ /* Compute the size of the code sections that we are going to ask the VM
+ to allocate. Note that this might not be precisely the size of the
+ code we emit, though it's fatal if we emit more code than the size we
+ compute here.
+ (Note: an example of a case where we emit less code would be useful.)
+ */
+
+ getEmitter()->emitComputeCodeSizes();
+
+#ifdef DEBUG
+
+ // Code to test or stress our ability to run a fallback compile.
+ // We trigger the fallback here, before asking the VM for any memory,
+ // because if not, we will leak mem, as the current codebase can't free
+ // the mem after the emitter asks the VM for it. As this is only a stress
+ // mode, we only want the functionality, and don't care about the relative
+ // ugliness of having the failure here.
+ if (!compiler->jitFallbackCompile)
+ {
+ // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
+ // especially that caused by enabling JIT stress.
+ if (!JitConfig.JitNoForceFallback())
+ {
+ if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
+ {
+ NO_WAY_NOASSERT("Stress failure");
+ }
+ }
+ }
+
+#endif // DEBUG
+
+ /* We've finished collecting all the unwind information for the function. Now reserve
+ space for it from the VM.
+ */
+
+ compiler->unwindReserve();
+
+#if DISPLAY_SIZES
+
+ size_t dataSize = getEmitter()->emitDataSize();
+
+#endif // DISPLAY_SIZES
+
+ void* coldCodePtr;
+
+ bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
+
+#ifdef _TARGET_AMD64_
+ trackedStackPtrsContig = false;
+#elif defined(_TARGET_ARM_)
+ // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
+ trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
+#elif defined(_TARGET_ARM64_)
+ // Incoming vararg registers are homed on the top of the stack. Tracked var may not be contiguous.
+ trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->info.compIsVarArgs;
+#else
+ trackedStackPtrsContig = !compiler->opts.compDbgEnC;
+#endif
+
+#ifdef DEBUG
+ /* We're done generating code for this function */
+ compiler->compCodeGenDone = true;
+#endif
+
+ compiler->EndPhase(PHASE_GENERATE_CODE);
+
+ codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
+ (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
+ &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
+
+ compiler->EndPhase(PHASE_EMIT_CODE);
+
+#ifdef DEBUG
+ if (compiler->opts.disAsm)
+ {
+ printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
+ compiler->info.compFullName);
+ printf("; ============================================================\n");
+ printf(""); // in our logic this causes a flush
+ }
+
+ if (verbose)
+ {
+ printf("*************** After end code gen, before unwindEmit()\n");
+ getEmitter()->emitDispIGlist(true);
+ }
+#endif
+
+#if EMIT_TRACK_STACK_DEPTH
+ /* Check our max stack level. Needed for fgAddCodeRef().
+ We need to relax the assert as our estimation won't include code-gen
+ stack changes (which we know don't affect fgAddCodeRef()) */
+ noway_assert(getEmitter()->emitMaxStackDepth <=
+ (compiler->fgPtrArgCntMax + compiler->compHndBBtabCount + // Return address for locally-called finallys
+ genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
+ (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
+#endif
+
+ *nativeSizeOfCode = codeSize;
+ compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
+
+ // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
+
+ // Make sure that the x86 alignment and cache prefetch optimization rules
+ // were obeyed.
+
+ // Don't start a method in the last 7 bytes of a 16-byte alignment area
+ // unless we are generating SMALL_CODE
+ // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
+
+ /* Now that the code is issued, we can finalize and emit the unwind data */
+
+ compiler->unwindEmit(*codePtr, coldCodePtr);
+
+#ifdef DEBUGGING_SUPPORT
+
+ /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
+
+ genIPmappingGen();
+
+ /* Finalize the Local Var info in terms of generated code */
+
+ genSetScopeInfo();
+
+#endif // DEBUGGING_SUPPORT
+
+#ifdef LATE_DISASM
+ unsigned finalHotCodeSize;
+ unsigned finalColdCodeSize;
+ if (compiler->fgFirstColdBlock != nullptr)
+ {
+ // We did some hot/cold splitting. The hot section is always padded out to the
+ // size we thought it would be, but the cold section is not.
+ assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
+ assert(compiler->info.compTotalHotCodeSize > 0);
+ assert(compiler->info.compTotalColdCodeSize > 0);
+ finalHotCodeSize = compiler->info.compTotalHotCodeSize;
+ finalColdCodeSize = codeSize - finalHotCodeSize;
+ }
+ else
+ {
+ // No hot/cold splitting
+ assert(codeSize <= compiler->info.compTotalHotCodeSize);
+ assert(compiler->info.compTotalHotCodeSize > 0);
+ assert(compiler->info.compTotalColdCodeSize == 0);
+ finalHotCodeSize = codeSize;
+ finalColdCodeSize = 0;
+ }
+ getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
+#endif // LATE_DISASM
+
+ /* Report any exception handlers to the VM */
+
+ genReportEH();
+
+#ifdef JIT32_GCENCODER
+#ifdef DEBUG
+ void* infoPtr =
+#endif // DEBUG
+#endif
+ // Create and store the GC info for this method.
+ genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+
+#ifdef DEBUG
+ FILE* dmpf = jitstdout;
+
+ compiler->opts.dmpHex = false;
+ if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
+ {
+ FILE* codf;
+ errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
+ if (ec != 0)
+ {
+ assert(codf);
+ dmpf = codf;
+ compiler->opts.dmpHex = true;
+ }
+ }
+ if (compiler->opts.dmpHex)
+ {
+ size_t consSize = getEmitter()->emitDataSize();
+ size_t infoSize = compiler->compInfoBlkSize;
+
+ fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
+ fprintf(dmpf, "\n");
+
+ if (codeSize)
+ {
+ fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
+ }
+ if (consSize)
+ {
+ fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
+ }
+#ifdef JIT32_GCENCODER
+ if (infoSize)
+ fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
+#endif // JIT32_GCENCODER
+
+ fprintf(dmpf, "\n");
+
+ if (codeSize)
+ {
+ hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
+ }
+ if (consSize)
+ {
+ hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
+ }
+#ifdef JIT32_GCENCODER
+ if (infoSize)
+ hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
+#endif // JIT32_GCENCODER
+
+ fflush(dmpf);
+ }
+
+ if (dmpf != jitstdout)
+ {
+ fclose(dmpf);
+ }
+
+#endif // DEBUG
+
+ /* Tell the emitter that we're done with this function */
+
+ getEmitter()->emitEndFN();
+
+ /* Shut down the spill logic */
+
+ regSet.rsSpillDone();
+
+ /* Shut down the temp logic */
+
+ compiler->tmpDone();
+
+#if DISPLAY_SIZES
+
+ grossVMsize += compiler->info.compILCodeSize;
+ totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
+ grossNCsize += codeSize + dataSize;
+
+#endif // DISPLAY_SIZES
+
+ compiler->EndPhase(PHASE_EMIT_GCEH);
+}
+
+/*****************************************************************************
+ *
+ * Report EH clauses to the VM
+ */
+
+void CodeGen::genReportEH()
+{
+ if (compiler->compHndBBtabCount == 0)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (compiler->opts.dspEHTable)
+ {
+ printf("*************** EH table for %s\n", compiler->info.compFullName);
+ }
+#endif // DEBUG
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ unsigned EHCount = compiler->compHndBBtabCount;
+
+#if FEATURE_EH_FUNCLETS
+ // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
+ // VM.
+ unsigned duplicateClauseCount = 0;
+ unsigned enclosingTryIndex;
+ for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
+ {
+ for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
+ // ignoring 'mutual protect' trys
+ enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
+ enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
+ {
+ ++duplicateClauseCount;
+ }
+ }
+ EHCount += duplicateClauseCount;
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ unsigned clonedFinallyCount = 0;
+
+ // We don't keep track of how many cloned finally there are. So, go through and count.
+ // We do a quick pass first through the EH table to see if there are any try/finally
+ // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
+
+ bool anyFinallys = false;
+ for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+ HBtab < HBtabEnd; HBtab++)
+ {
+ if (HBtab->HasFinallyHandler())
+ {
+ anyFinallys = true;
+ break;
+ }
+ }
+ if (anyFinallys)
+ {
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ ++clonedFinallyCount;
+ }
+ }
+
+ EHCount += clonedFinallyCount;
+ }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef DEBUG
+ if (compiler->opts.dspEHTable)
+ {
+#if FEATURE_EH_FUNCLETS
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
+ compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
+ assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
+#else // !FEATURE_EH_CALLFINALLY_THUNKS
+ printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
+ compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
+ assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+#else // !FEATURE_EH_FUNCLETS
+ printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
+ assert(compiler->compHndBBtabCount == EHCount);
+#endif // !FEATURE_EH_FUNCLETS
+ }
+#endif // DEBUG
+
+ // Tell the VM how many EH clauses to expect.
+ compiler->eeSetEHcount(EHCount);
+
+ XTnum = 0; // This is the index we pass to the VM
+
+ for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+ HBtab < HBtabEnd; HBtab++)
+ {
+ UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
+
+ tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
+ hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
+
+ tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+ : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
+ hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+ : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
+
+ if (HBtab->HasFilter())
+ {
+ hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
+ }
+ else
+ {
+ hndTyp = HBtab->ebdTyp;
+ }
+
+ CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
+
+ // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
+ // the fields aren't accurate.
+
+ CORINFO_EH_CLAUSE clause;
+ clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
+ clause.Flags = flags;
+ clause.TryOffset = tryBeg;
+ clause.TryLength = tryEnd;
+ clause.HandlerOffset = hndBeg;
+ clause.HandlerLength = hndEnd;
+
+ assert(XTnum < EHCount);
+
+ // Tell the VM about this EH clause.
+ compiler->eeSetEHinfo(XTnum, &clause);
+
+ ++XTnum;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ // Now output duplicated clauses.
+ //
+ // If a funclet has been created by moving a handler out of a try region that it was originally nested
+ // within, then we need to report a "duplicate" clause representing the fact that an exception in that
+ // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
+ // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
+ // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
+ // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
+ // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
+ // region.
+ //
+ // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
+ // try or handler region):
+ //
+ // A
+ // try (1) {
+ // B
+ // try (2) {
+ // C
+ // } catch (3) {
+ // D
+ // } catch (4) {
+ // E
+ // }
+ // F
+ // } catch (5) {
+ // G
+ // }
+ // H
+ //
+ // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
+ // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
+ // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
+ // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
+ // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
+ // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
+ // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
+ // representing try (1) protecting the new funclets catch (3) and (4).
+ // The code will be generated as follows:
+ //
+ // ABCFH // "main" code
+ // D // funclet
+ // E // funclet
+ // G // funclet
+ //
+ // The EH regions are:
+ //
+ // C -> D
+ // C -> E
+ // BCF -> G
+ // D -> G // "duplicate" clause
+ // E -> G // "duplicate" clause
+ //
+ // Note that we actually need to generate one of these additional "duplicate" clauses for every
+ // region the funclet is nested in. Take this example:
+ //
+ // A
+ // try (1) {
+ // B
+ // try (2,3) {
+ // C
+ // try (4) {
+ // D
+ // try (5,6) {
+ // E
+ // } catch {
+ // F
+ // } catch {
+ // G
+ // }
+ // H
+ // } catch {
+ // I
+ // }
+ // J
+ // } catch {
+ // K
+ // } catch {
+ // L
+ // }
+ // M
+ // } catch {
+ // N
+ // }
+ // O
+ //
+ // When we pull out funclets, we get the following generated code:
+ //
+ // ABCDEHJMO // "main" function
+ // F // funclet
+ // G // funclet
+ // I // funclet
+ // K // funclet
+ // L // funclet
+ // N // funclet
+ //
+ // And the EH regions we report to the VM are (in order; main clauses
+ // first in most-to-least nested order, funclets ("duplicated clauses")
+ // last, in most-to-least nested) are:
+ //
+ // E -> F
+ // E -> G
+ // DEH -> I
+ // CDEHJ -> K
+ // CDEHJ -> L
+ // BCDEHJM -> N
+ // F -> I // funclet clause #1 for F
+ // F -> K // funclet clause #2 for F
+ // F -> L // funclet clause #3 for F
+ // F -> N // funclet clause #4 for F
+ // G -> I // funclet clause #1 for G
+ // G -> K // funclet clause #2 for G
+ // G -> L // funclet clause #3 for G
+ // G -> N // funclet clause #4 for G
+ // I -> K // funclet clause #1 for I
+ // I -> L // funclet clause #2 for I
+ // I -> N // funclet clause #3 for I
+ // K -> N // funclet clause #1 for K
+ // L -> N // funclet clause #1 for L
+ //
+ // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
+ // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
+ // to add a clause "F -> G" because F is NOT protected by G, but we still have
+ // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
+ //
+ // The overall ordering of the clauses is still the same most-to-least nesting
+ // after front-to-back start offset. Because we place the funclets at the end
+ // these new clauses should also go at the end by this ordering.
+ //
+
+ if (duplicateClauseCount > 0)
+ {
+ unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
+ unsigned XTnum2;
+ for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
+ {
+ unsigned enclosingTryIndex;
+
+ EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
+
+ for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
+ // ignoring 'mutual protect' trys
+ enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
+ enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
+ {
+ // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
+ // that will have the enclosing try protecting the funclet.
+
+ noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
+ // greater EH table index
+
+ EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
+
+ // The try region is the handler of the funclet. Note that for filters, we don't protect the
+ // filter region, only the filter handler region. This is because exceptions in filters never
+ // escape; the VM swallows them.
+
+ BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
+ BasicBlock* bbTryLast = fletTab->ebdHndLast;
+
+ BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
+ BasicBlock* bbHndLast = encTab->ebdHndLast;
+
+ UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
+
+ tryBeg = compiler->ehCodeOffset(bbTryBeg);
+ hndBeg = compiler->ehCodeOffset(bbHndBeg);
+
+ tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+ : compiler->ehCodeOffset(bbTryLast->bbNext);
+ hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+ : compiler->ehCodeOffset(bbHndLast->bbNext);
+
+ if (encTab->HasFilter())
+ {
+ hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
+ }
+ else
+ {
+ hndTyp = encTab->ebdTyp;
+ }
+
+ CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
+
+ // Tell the VM this is an extra clause caused by moving funclets out of line.
+ // It seems weird this is from the CorExceptionFlag enum in corhdr.h,
+ // not the CORINFO_EH_CLAUSE_FLAGS enum in corinfo.h.
+ flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | COR_ILEXCEPTION_CLAUSE_DUPLICATED);
+
+ // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
+ // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
+ // instruction immediately after the 'try' body. So, it really could be more accurately named
+ // "TryEndOffset".
+
+ CORINFO_EH_CLAUSE clause;
+ clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
+ clause.Flags = flags;
+ clause.TryOffset = tryBeg;
+ clause.TryLength = tryEnd;
+ clause.HandlerOffset = hndBeg;
+ clause.HandlerLength = hndEnd;
+
+ assert(XTnum < EHCount);
+
+ // Tell the VM about this EH clause (a duplicated clause).
+ compiler->eeSetEHinfo(XTnum, &clause);
+
+ ++XTnum;
+ ++reportedDuplicateClauseCount;
+
+#ifndef DEBUG
+ if (duplicateClauseCount == reportedDuplicateClauseCount)
+ {
+ break; // we've reported all of them; no need to continue looking
+ }
+#endif // !DEBUG
+
+ } // for each 'true' enclosing 'try'
+ } // for each EH table entry
+
+ assert(duplicateClauseCount == reportedDuplicateClauseCount);
+ } // if (duplicateClauseCount > 0)
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ if (anyFinallys)
+ {
+ unsigned reportedClonedFinallyCount = 0;
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ UNATIVE_OFFSET hndBeg, hndEnd;
+
+ hndBeg = compiler->ehCodeOffset(block);
+
+ // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
+ // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
+ BasicBlock* bbLabel = block->bbNext;
+ if (block->isBBCallAlwaysPair())
+ {
+ bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
+ }
+ if (bbLabel == nullptr)
+ {
+ hndEnd = compiler->info.compNativeCodeSize;
+ }
+ else
+ {
+ assert(bbLabel->bbEmitCookie != nullptr);
+ hndEnd = compiler->ehCodeOffset(bbLabel);
+ }
+
+ CORINFO_EH_CLAUSE clause;
+ clause.ClassToken = 0; // unused
+ clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | COR_ILEXCEPTION_CLAUSE_DUPLICATED);
+ clause.TryOffset = hndBeg;
+ clause.TryLength = hndBeg;
+ clause.HandlerOffset = hndBeg;
+ clause.HandlerLength = hndEnd;
+
+ assert(XTnum < EHCount);
+
+ // Tell the VM about this EH clause (a cloned finally clause).
+ compiler->eeSetEHinfo(XTnum, &clause);
+
+ ++XTnum;
+ ++reportedClonedFinallyCount;
+
+#ifndef DEBUG
+ if (clonedFinallyCount == reportedClonedFinallyCount)
+ {
+ break; // we're done; no need to keep looking
+ }
+#endif // !DEBUG
+ } // block is BBJ_CALLFINALLY
+ } // for each block
+
+ assert(clonedFinallyCount == reportedClonedFinallyCount);
+ } // if (anyFinallys)
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+#endif // FEATURE_EH_FUNCLETS
+
+ assert(XTnum == EHCount);
+}
+
+void CodeGen::genGCWriteBarrier(GenTreePtr tgt, GCInfo::WriteBarrierForm wbf)
+{
+#ifndef LEGACY_BACKEND
+ noway_assert(tgt->gtOper == GT_STOREIND);
+#else // LEGACY_BACKEND
+ noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR); // enforced by gcIsWriteBarrierCandidate
+#endif // LEGACY_BACKEND
+
+ /* Call the proper vm helper */
+ int helper = CORINFO_HELP_ASSIGN_REF;
+#ifdef DEBUG
+ if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+ {
+ helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
+ }
+ else
+#endif
+ if (tgt->gtOper != GT_CLS_VAR)
+ {
+ if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
+ {
+ if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
+ {
+ helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
+ }
+ else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
+ {
+ helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
+ }
+ }
+ }
+ assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
+ ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
+ (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
+ ((helper == CORINFO_HELP_ASSIGN_REF) &&
+ (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
+
+#ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
+ // We classify the "tgt" trees as follows:
+ // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
+ // IND [-> ADDR -> IND] -> { GT_LCL_VAR, GT_REG_VAR, ADD({GT_LCL_VAR, GT_REG_VAR}, X), ADD(X, (GT_LCL_VAR,
+ // GT_REG_VAR)) }
+ // then let "v" be the GT_LCL_VAR or GT_REG_VAR.
+ // * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
+ // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
+ // * Otherwise, classify as CWBKind_OtherByRefLocal.
+ // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
+ // Otherwise, classify as CWBKind_Unclassified.
+
+ CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
+ if (tgt->gtOper == GT_IND)
+ {
+ GenTreePtr lcl = NULL;
+
+ GenTreePtr indArg = tgt->gtOp.gtOp1;
+ if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
+ {
+ indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
+ }
+ if (indArg->gtOper == GT_LCL_VAR || indArg->gtOper == GT_REG_VAR)
+ {
+ lcl = indArg;
+ }
+ else if (indArg->gtOper == GT_ADD)
+ {
+ if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp1->gtOper == GT_REG_VAR)
+ {
+ lcl = indArg->gtOp.gtOp1;
+ }
+ else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp2->gtOper == GT_REG_VAR)
+ {
+ lcl = indArg->gtOp.gtOp2;
+ }
+ }
+ if (lcl != NULL)
+ {
+ wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
+ unsigned lclNum = 0;
+ if (lcl->gtOper == GT_LCL_VAR)
+ lclNum = lcl->gtLclVarCommon.gtLclNum;
+ else
+ {
+ assert(lcl->gtOper == GT_REG_VAR);
+ lclNum = lcl->gtRegVar.gtLclNum;
+ }
+ if (lclNum == compiler->info.compRetBuffArg)
+ {
+ wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
+ }
+ else
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
+ if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
+ {
+ wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
+ }
+ }
+ }
+ else
+ {
+ // We should have eliminated the barrier for this case.
+ assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
+ }
+ }
+
+ if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
+ {
+#if 0
+#ifdef DEBUG
+ // Enable this to sample the unclassified trees.
+ static int unclassifiedBarrierSite = 0;
+ if (wbKind == CWBKind_Unclassified)
+ {
+ unclassifiedBarrierSite++;
+ printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
+ }
+#endif // DEBUG
+#endif // 0
+ genStackLevel += 4;
+ inst_IV(INS_push, wbKind);
+ genEmitHelperCall(helper,
+ 4, // argSize
+ EA_PTRSIZE); // retSize
+ genStackLevel -= 4;
+ }
+ else
+ {
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+ }
+
+#else // !FEATURE_COUNT_GC_WRITE_BARRIERS
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+#endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Prolog / Epilog XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ * Generates code for moving incoming register arguments to their
+ * assigned location, in the function prolog.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
+ }
+#endif
+
+#ifdef _TARGET_ARM64_
+ if (compiler->info.compIsVarArgs)
+ {
+ // We've already saved all int registers at the top of stack in the prolog.
+ // No need further action.
+ return;
+ }
+#endif
+
+ unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
+ unsigned argNum; // current argNum, always in [0..argMax-1]
+ unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
+ unsigned regArgNum; // index into the regArgTab[] table
+ regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
+ bool doingFloat = regState->rsIsFloat;
+
+ // We should be generating the prolog block when we are called
+ assert(compiler->compGeneratingProlog);
+
+ // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
+ noway_assert(regArgMaskLive != 0);
+
+ // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
+ // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
+ //
+ // The regArgTab can always have unused entries,
+ // for example if an architecture always increments the arg register number but uses either
+ // an integer register or a floating point register to hold the next argument
+ // then with a mix of float and integer args you could have:
+ //
+ // sampleMethod(int i, float x, int j, float y, int k, float z);
+ // r0, r2 and r4 as valid integer arguments with argMax as 5
+ // and f1, f3 and f5 and valid floating point arguments with argMax as 6
+ // The first one is doingFloat==false and the second one is doingFloat==true
+ //
+ // If a fixed return buffer (in r8) was also present then the first one would become:
+ // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
+ //
+
+ argMax = regState->rsCalleeRegArgCount;
+ fixedRetBufIndex = (unsigned)-1; // Invalid value
+
+ // If necessary we will select a correct xtraReg for circular floating point args later.
+ if (doingFloat)
+ {
+ xtraReg = REG_NA;
+ noway_assert(argMax <= MAX_FLOAT_REG_ARG);
+ }
+ else // we are doing the integer registers
+ {
+ noway_assert(argMax <= MAX_REG_ARG);
+ if (hasFixedRetBuffReg())
+ {
+ fixedRetBufIndex = theFixedRetBuffArgNum();
+ // We have an additional integer register argument when hasFixedRetBuffReg() is true
+ argMax = fixedRetBufIndex + 1;
+ assert(argMax == (MAX_REG_ARG + 1));
+ }
+ }
+
+ //
+ // Construct a table with the register arguments, for detecting circular and
+ // non-circular dependencies between the register arguments. A dependency is when
+ // an argument register Rn needs to be moved to register Rm that is also an argument
+ // register. The table is constructed in the order the arguments are passed in
+ // registers: the first register argument is in regArgTab[0], the second in
+ // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
+ // at an even index. The regArgTab is indexed from 0 to argMax - 1.
+ // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
+ // we have increased the allocated size of the regArgTab[] by one.
+ //
+ struct regArgElem
+ {
+ unsigned varNum; // index into compiler->lvaTable[] for this register argument
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ var_types type; // the Jit type of this regArgTab entry
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
+ // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
+ // argument register number 'x'. Only used when circular = true.
+ char slot; // 0 means the register is not used for a register argument
+ // 1 means the first part of a register argument
+ // 2, 3 or 4 means the second,third or fourth part of a multireg argument
+ bool stackArg; // true if the argument gets homed to the stack
+ bool processed; // true after we've processed the argument (and it is in its final location)
+ bool circular; // true if this register participates in a circular dependency loop.
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
+ // the type of the lclVar in ways that are not ascertainable from lvType.
+ // So, for that case we retain the type of the register in the regArgTab.
+
+ var_types getRegType(Compiler* compiler)
+ {
+ return type; // UNIX_AMD64 implementation
+ }
+
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // In other cases, we simply use the type of the lclVar to determine the type of the register.
+ var_types getRegType(Compiler* compiler)
+ {
+ LclVarDsc varDsc = compiler->lvaTable[varNum];
+ // Check if this is an HFA register arg and return the HFA type
+ if (varDsc.lvIsHfaRegArg())
+ {
+ return varDsc.GetHfaType();
+ }
+ return varDsc.lvType;
+ }
+
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ // Is this variable a register arg?
+ if (!varDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
+ // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
+ // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
+ // use the the original TYP_STRUCT argument.
+ //
+ if (varDsc->lvPromoted || varDsc->lvIsStructField)
+ {
+ LclVarDsc* parentVarDsc = varDsc;
+ if (varDsc->lvIsStructField)
+ {
+ assert(!varDsc->lvPromoted);
+ parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
+ }
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
+
+ if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
+ {
+ noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
+
+ // For register arguments that are independent promoted structs we put the promoted field varNum in the
+ // regArgTab[]
+ if (varDsc->lvPromoted)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ // For register arguments that are not independent promoted structs we put the parent struct varNum in
+ // the regArgTab[]
+ if (varDsc->lvIsStructField)
+ {
+ continue;
+ }
+ }
+ }
+
+ var_types regType = varDsc->TypeGet();
+ // Change regType to the HFA type when we have a HFA argument
+ if (varDsc->lvIsHfaRegArg())
+ {
+ regType = varDsc->GetHfaType();
+ }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (!varTypeIsStruct(regType))
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // A struct might be passed partially in XMM register for System V calls.
+ // So a single arg might use both register files.
+ if (isFloatRegType(regType) != doingFloat)
+ {
+ continue;
+ }
+ }
+
+ int slots = 0;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varTypeIsStruct(varDsc))
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ if (!structDesc.passedInRegisters)
+ {
+ // The var is not passed in registers.
+ continue;
+ }
+
+ unsigned firstRegSlot = 0;
+ for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
+ {
+ regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
+ var_types regType;
+
+#ifdef FEATURE_SIMD
+ // Assumption 1:
+ // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
+ // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
+ // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
+ // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
+ //
+ // Assumption 2:
+ // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
+ // registers or on stack, the upper most 4-bytes will be zero.
+ //
+ // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+ // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+ // invalid.
+ //
+ // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+ // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+ // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+ // there is no need to clear upper 4-bytes of Vector3 type args.
+ //
+ // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+ // Vector3 return values are returned two return registers and Caller assembles them into a
+ // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+ // type args in prolog and Vector3 type return value of a call
+
+ if (varDsc->lvType == TYP_SIMD12)
+ {
+ regType = TYP_DOUBLE;
+ }
+ else
+#endif
+ {
+ regType = compiler->GetEightByteType(structDesc, slotCounter);
+ }
+
+ regArgNum = genMapRegNumToRegArgNum(regNum, regType);
+
+ if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
+ (doingFloat && (structDesc.IsSseSlot(slotCounter))))
+ {
+ // Store the reg for the first slot.
+ if (slots == 0)
+ {
+ firstRegSlot = regArgNum;
+ }
+
+ // Bingo - add it to our table
+ noway_assert(regArgNum < argMax);
+ noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
+ // not be multiple vars representing this argument
+ // register)
+ regArgTab[regArgNum].varNum = varNum;
+ regArgTab[regArgNum].slot = (char)(slotCounter + 1);
+ regArgTab[regArgNum].type = regType;
+ slots++;
+ }
+ }
+
+ if (slots == 0)
+ {
+ continue; // Nothing to do for this regState set.
+ }
+
+ regArgNum = firstRegSlot;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Bingo - add it to our table
+ regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
+
+ noway_assert(regArgNum < argMax);
+ // We better not have added it already (there better not be multiple vars representing this argument
+ // register)
+ noway_assert(regArgTab[regArgNum].slot == 0);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Set the register type.
+ regArgTab[regArgNum].type = regType;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ regArgTab[regArgNum].varNum = varNum;
+ regArgTab[regArgNum].slot = 1;
+
+ slots = 1;
+
+#if FEATURE_MULTIREG_ARGS
+ if (compiler->lvaIsMultiregStruct(varDsc))
+ {
+ if (varDsc->lvIsHfaRegArg())
+ {
+ // We have an HFA argument, set slots to the number of registers used
+ slots = varDsc->lvHfaSlots();
+ }
+ else
+ {
+ // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
+ assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
+ // We have a non-HFA multireg argument, set slots to two
+ slots = 2;
+ }
+
+ // Note that regArgNum+1 represents an argument index not an actual argument register.
+ // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
+
+ // This is the setup for the rest of a multireg struct arg
+
+ for (int i = 1; i < slots; i++)
+ {
+ noway_assert((regArgNum + i) < argMax);
+
+ // We better not have added it already (there better not be multiple vars representing this argument
+ // register)
+ noway_assert(regArgTab[regArgNum + i].slot == 0);
+
+ regArgTab[regArgNum + i].varNum = varNum;
+ regArgTab[regArgNum + i].slot = (char)(i + 1);
+ }
+ }
+#endif // FEATURE_MULTIREG_ARGS
+ }
+
+#ifdef _TARGET_ARM_
+ int lclSize = compiler->lvaLclSize(varNum);
+
+ if (lclSize > REGSIZE_BYTES)
+ {
+ unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
+ slots = lclSize / REGSIZE_BYTES;
+ if (regArgNum + slots > maxRegArgNum)
+ {
+ slots = maxRegArgNum - regArgNum;
+ }
+ }
+ C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
+ assert(slots < INT8_MAX);
+ for (char i = 1; i < slots; i++)
+ {
+ regArgTab[regArgNum + i].varNum = varNum;
+ regArgTab[regArgNum + i].slot = i + 1;
+ }
+#endif // _TARGET_ARM_
+
+ for (int i = 0; i < slots; i++)
+ {
+ regType = regArgTab[regArgNum + i].getRegType(compiler);
+ regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
+ // The type of the register depends on the classification of the first eightbyte
+ // of the struct. For information on classification refer to the System V x86_64 ABI at:
+ // http://www.x86-64.org/documentation/abi.pdf
+
+ assert((i > 0) || (regNum == varDsc->lvArgReg));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Is the arg dead on entry to the method ?
+
+ if ((regArgMaskLive & genRegMask(regNum)) == 0)
+ {
+ if (varDsc->lvTrackedNonStruct())
+ {
+ noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
+ }
+ else
+ {
+#ifdef _TARGET_X86_
+ noway_assert(varDsc->lvType == TYP_STRUCT);
+#else // !_TARGET_X86_
+#ifndef LEGACY_BACKEND
+ // For LSRA, it may not be in regArgMaskLive if it has a zero
+ // refcnt. This is in contrast with the non-LSRA case in which all
+ // non-tracked args are assumed live on entry.
+ noway_assert((varDsc->lvRefCnt == 0) || (varDsc->lvType == TYP_STRUCT) ||
+ (varDsc->lvAddrExposed && compiler->info.compIsVarArgs));
+#else // LEGACY_BACKEND
+ noway_assert(
+ varDsc->lvType == TYP_STRUCT ||
+ (varDsc->lvAddrExposed && (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)));
+#endif // LEGACY_BACKEND
+#endif // !_TARGET_X86_
+ }
+ // Mark it as processed and be done with it
+ regArgTab[regArgNum + i].processed = true;
+ goto NON_DEP;
+ }
+
+#ifdef _TARGET_ARM_
+ // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
+ // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
+ // they've already been spilled.
+ //
+ if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
+#endif // _TARGET_ARM_
+ {
+ noway_assert(xtraReg != varDsc->lvArgReg + i);
+ noway_assert(regArgMaskLive & genRegMask(regNum));
+ }
+
+ regArgTab[regArgNum + i].processed = false;
+
+ /* mark stack arguments since we will take care of those first */
+ regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
+
+ /* If it goes on the stack or in a register that doesn't hold
+ * an argument anymore -> CANNOT form a circular dependency */
+
+ if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
+ {
+ /* will trash another argument -> possible dependency
+ * We may need several passes after the table is constructed
+ * to decide on that */
+
+ /* Maybe the argument stays in the register (IDEAL) */
+
+ if ((i == 0) && (varDsc->lvRegNum == regNum))
+ {
+ goto NON_DEP;
+ }
+
+#if !defined(_TARGET_64BIT_)
+ if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
+ {
+ goto NON_DEP;
+ }
+ if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
+ {
+ goto NON_DEP;
+ }
+
+ if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
+ (REG_NEXT(varDsc->lvRegNum) == regNum))
+ {
+ goto NON_DEP;
+ }
+#endif // !defined(_TARGET_64BIT_)
+ regArgTab[regArgNum + i].circular = true;
+ }
+ else
+ {
+ NON_DEP:
+ regArgTab[regArgNum + i].circular = false;
+
+ /* mark the argument register as free */
+ regArgMaskLive &= ~genRegMask(regNum);
+ }
+ }
+ }
+
+ /* Find the circular dependencies for the argument registers, if any.
+ * A circular dependency is a set of registers R1, R2, ..., Rn
+ * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
+
+ bool change = true;
+ if (regArgMaskLive)
+ {
+ /* Possible circular dependencies still exist; the previous pass was not enough
+ * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
+
+ while (change)
+ {
+ change = false;
+
+ for (argNum = 0; argNum < argMax; argNum++)
+ {
+ // If we already marked the argument as non-circular then continue
+
+ if (!regArgTab[argNum].circular)
+ {
+ continue;
+ }
+
+ if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
+ continue;
+ }
+
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+ noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+
+ /* cannot possibly have stack arguments */
+ noway_assert(varDsc->lvIsInReg());
+ noway_assert(!regArgTab[argNum].stackArg);
+
+ var_types regType = regArgTab[argNum].getRegType(compiler);
+ regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
+
+ regNumber destRegNum = REG_NA;
+ if (regArgTab[argNum].slot == 1)
+ {
+ destRegNum = varDsc->lvRegNum;
+ }
+#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ assert(argNum > 0);
+ assert(regArgTab[argNum - 1].slot == 1);
+ assert(regArgTab[argNum - 1].varNum == varNum);
+ assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
+ regArgMaskLive &= ~genRegMask(regNum);
+ regArgTab[argNum].circular = false;
+ change = true;
+ continue;
+ }
+#elif !defined(_TARGET_64BIT_)
+ else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
+ {
+ destRegNum = varDsc->lvOtherReg;
+ }
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ assert(varDsc->TypeGet() == TYP_DOUBLE);
+ destRegNum = REG_NEXT(varDsc->lvRegNum);
+ }
+#endif // !defined(_TARGET_64BIT_)
+ noway_assert(destRegNum != REG_NA);
+ if (genRegMask(destRegNum) & regArgMaskLive)
+ {
+ /* we are trashing a live argument register - record it */
+ unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
+ noway_assert(destRegArgNum < argMax);
+ regArgTab[destRegArgNum].trashBy = argNum;
+ }
+ else
+ {
+ /* argument goes to a free register */
+ regArgTab[argNum].circular = false;
+ change = true;
+
+ /* mark the argument register as free */
+ regArgMaskLive &= ~genRegMask(regNum);
+ }
+ }
+ }
+ }
+
+ /* At this point, everything that has the "circular" flag
+ * set to "true" forms a circular dependency */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (regArgMaskLive)
+ {
+ if (verbose)
+ {
+ printf("Circular dependencies found while home-ing the incoming arguments.\n");
+ }
+ }
+#endif
+
+ // LSRA allocates registers to incoming parameters in order and will not overwrite
+ // a register still holding a live parameter.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+ noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
+ "Homing of float argument registers with circular dependencies not implemented.");
+#endif // LEGACY_BACKEND
+
+ /* Now move the arguments to their locations.
+ * First consider ones that go on the stack since they may
+ * free some registers. */
+
+ regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
+ for (argNum = 0; argNum < argMax; argNum++)
+ {
+ emitAttr size;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If this is the wrong register file, just continue.
+ if (regArgTab[argNum].type == TYP_UNDEF)
+ {
+ // This could happen if the reg in regArgTab[argNum] is of the other register file -
+ // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+ // The next register file processing will process it.
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If the arg is dead on entry to the method, skip it
+
+ if (regArgTab[argNum].processed)
+ {
+ continue;
+ }
+
+ if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
+ continue;
+ }
+
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+#ifndef _TARGET_64BIT_
+ // If not a stack arg go to the next one
+ if (varDsc->lvType == TYP_LONG)
+ {
+ if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
+ {
+ continue;
+ }
+ else if (varDsc->lvOtherReg != REG_STK)
+ {
+ continue;
+ }
+ }
+ else
+#endif // !_TARGET_64BIT_
+ {
+ // If not a stack arg go to the next one
+ if (!regArgTab[argNum].stackArg)
+ {
+ continue;
+ }
+ }
+
+#if defined(_TARGET_ARM_)
+ if (varDsc->lvType == TYP_DOUBLE)
+ {
+ if (regArgTab[argNum].slot == 2)
+ {
+ // We handled the entire double when processing the first half (slot == 1)
+ continue;
+ }
+ }
+#endif
+
+ noway_assert(regArgTab[argNum].circular == false);
+
+ noway_assert(varDsc->lvIsParam);
+ noway_assert(varDsc->lvIsRegArg);
+ noway_assert(varDsc->lvIsInReg() == false ||
+ (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
+
+ var_types storeType = TYP_UNDEF;
+ unsigned slotSize = TARGET_POINTER_SIZE;
+
+ if (varTypeIsStruct(varDsc))
+ {
+ storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
+#if FEATURE_MULTIREG_ARGS
+ // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
+ noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
+#endif // FEATURE_MULTIREG_ARGS
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ storeType = regArgTab[argNum].type;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (varDsc->lvIsHfaRegArg())
+ {
+#ifdef _TARGET_ARM_
+ // On ARM32 the storeType for HFA args is always TYP_FLOAT
+ storeType = TYP_FLOAT;
+ slotSize = (unsigned)emitActualTypeSize(storeType);
+#else // _TARGET_ARM64_
+ storeType = genActualType(varDsc->GetHfaType());
+ slotSize = (unsigned)emitActualTypeSize(storeType);
+#endif // _TARGET_ARM64_
+ }
+ }
+ else // Not a struct type
+ {
+ storeType = genActualType(varDsc->TypeGet());
+ }
+ size = emitActualTypeSize(storeType);
+#ifdef _TARGET_X86_
+ noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
+#endif //_TARGET_X86_
+
+ regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
+
+ // Stack argument - if the ref count is 0 don't care about it
+
+ if (!varDsc->lvOnFrame)
+ {
+ noway_assert(varDsc->lvRefCnt == 0);
+ }
+ else
+ {
+ // Since slot is typically 1, baseOffset is typically 0
+ int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
+
+ getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Check if we are writing past the end of the struct
+ if (varTypeIsStruct(varDsc))
+ {
+ assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (regArgTab[argNum].slot == 1)
+ {
+ psiMoveToStack(varNum);
+ }
+ }
+
+ /* mark the argument as processed */
+
+ regArgTab[argNum].processed = true;
+ regArgMaskLive &= ~genRegMask(srcRegNum);
+
+#if defined(_TARGET_ARM_)
+ if (storeType == TYP_DOUBLE)
+ {
+ regArgTab[argNum + 1].processed = true;
+ regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
+ }
+#endif
+ }
+
+ /* Process any circular dependencies */
+ if (regArgMaskLive)
+ {
+ unsigned begReg, destReg, srcReg;
+ unsigned varNumDest, varNumSrc;
+ LclVarDsc* varDscDest;
+ LclVarDsc* varDscSrc;
+ instruction insCopy = INS_mov;
+
+ if (doingFloat)
+ {
+#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ insCopy = ins_Copy(TYP_DOUBLE);
+ // Compute xtraReg here when we have a float argument
+ assert(xtraReg == REG_NA);
+
+ regMaskTP fpAvailMask;
+
+ fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
+#if defined(FEATURE_HFA)
+ fpAvailMask &= RBM_ALLDOUBLE;
+#else
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#error Error. Wrong architecture.
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif // defined(FEATURE_HFA)
+
+ if (fpAvailMask == RBM_NONE)
+ {
+ fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
+#if defined(FEATURE_HFA)
+ fpAvailMask &= RBM_ALLDOUBLE;
+#else
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#error Error. Wrong architecture.
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif // defined(FEATURE_HFA)
+ }
+
+ assert(fpAvailMask != RBM_NONE);
+
+ // We pick the lowest avail register number
+ regMaskTP tempMask = genFindLowestBit(fpAvailMask);
+ xtraReg = genRegNumFromMask(tempMask);
+#elif defined(_TARGET_X86_)
+ // This case shouldn't occur on x86 since NYI gets converted to an assert
+ NYI("Homing circular FP registers via xtraReg");
+#endif
+ }
+
+ for (argNum = 0; argNum < argMax; argNum++)
+ {
+ // If not a circular dependency then continue
+ if (!regArgTab[argNum].circular)
+ {
+ continue;
+ }
+
+ // If already processed the dependency then continue
+
+ if (regArgTab[argNum].processed)
+ {
+ continue;
+ }
+
+ if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
+ continue;
+ }
+
+ destReg = begReg = argNum;
+ srcReg = regArgTab[argNum].trashBy;
+
+ varNumDest = regArgTab[destReg].varNum;
+ noway_assert(varNumDest < compiler->lvaCount);
+ varDscDest = compiler->lvaTable + varNumDest;
+ noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
+
+ noway_assert(srcReg < argMax);
+ varNumSrc = regArgTab[srcReg].varNum;
+ noway_assert(varNumSrc < compiler->lvaCount);
+ varDscSrc = compiler->lvaTable + varNumSrc;
+ noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
+
+ emitAttr size = EA_PTRSIZE;
+
+#ifdef _TARGET_XARCH_
+ //
+ // The following code relies upon the target architecture having an
+ // 'xchg' instruction which directly swaps the values held in two registers.
+ // On the ARM architecture we do not have such an instruction.
+ //
+ if (destReg == regArgTab[srcReg].trashBy)
+ {
+ /* only 2 registers form the circular dependency - use "xchg" */
+
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+ noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+
+ noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
+
+ /* Set "size" to indicate GC if one and only one of
+ * the operands is a pointer
+ * RATIONALE: If both are pointers, nothing changes in
+ * the GC pointer tracking. If only one is a pointer we
+ * have to "swap" the registers in the GC reg pointer mask
+ */
+
+ if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
+ {
+ size = EA_GCREF;
+ }
+
+ noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
+
+ getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
+ regTracker.rsTrackRegTrash(varDscSrc->lvRegNum);
+ regTracker.rsTrackRegTrash(varDscSrc->lvArgReg);
+
+ /* mark both arguments as processed */
+ regArgTab[destReg].processed = true;
+ regArgTab[srcReg].processed = true;
+
+ regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
+ regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
+
+ psiMoveToReg(varNumSrc);
+ psiMoveToReg(varNumDest);
+ }
+ else
+#endif // _TARGET_XARCH_
+ {
+ var_types destMemType = varDscDest->TypeGet();
+
+#ifdef _TARGET_ARM_
+ bool cycleAllDouble = true; // assume the best
+
+ unsigned iter = begReg;
+ do
+ {
+ if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
+ {
+ cycleAllDouble = false;
+ break;
+ }
+ iter = regArgTab[iter].trashBy;
+ } while (iter != begReg);
+
+ // We may treat doubles as floats for ARM because we could have partial circular
+ // dependencies of a float with a lo/hi part of the double. We mark the
+ // trashBy values for each slot of the double, so let the circular dependency
+ // logic work its way out for floats rather than doubles. If a cycle has all
+ // doubles, then optimize so that instead of two vmov.f32's to move a double,
+ // we can use one vmov.f64.
+ //
+ if (!cycleAllDouble && destMemType == TYP_DOUBLE)
+ {
+ destMemType = TYP_FLOAT;
+ }
+#endif // _TARGET_ARM_
+
+ if (destMemType == TYP_REF)
+ {
+ size = EA_GCREF;
+ }
+ else if (destMemType == TYP_BYREF)
+ {
+ size = EA_BYREF;
+ }
+ else if (destMemType == TYP_DOUBLE)
+ {
+ size = EA_8BYTE;
+ }
+ else if (destMemType == TYP_FLOAT)
+ {
+ size = EA_4BYTE;
+ }
+
+ /* move the dest reg (begReg) in the extra reg */
+
+ assert(xtraReg != REG_NA);
+
+ regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
+
+ getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
+
+ regTracker.rsTrackRegCopy(xtraReg, begRegNum);
+
+ *pXtraRegClobbered = true;
+
+ psiMoveToReg(varNumDest, xtraReg);
+
+ /* start moving everything to its right place */
+
+ while (srcReg != begReg)
+ {
+ /* mov dest, src */
+
+ regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
+ regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
+
+ getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
+
+ regTracker.rsTrackRegCopy(destRegNum, srcRegNum);
+
+ /* mark 'src' as processed */
+ noway_assert(srcReg < argMax);
+ regArgTab[srcReg].processed = true;
+#ifdef _TARGET_ARM_
+ if (size == EA_8BYTE)
+ regArgTab[srcReg + 1].processed = true;
+#endif
+ regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
+
+ /* move to the next pair */
+ destReg = srcReg;
+ srcReg = regArgTab[srcReg].trashBy;
+
+ varDscDest = varDscSrc;
+ destMemType = varDscDest->TypeGet();
+#ifdef _TARGET_ARM_
+ if (!cycleAllDouble && destMemType == TYP_DOUBLE)
+ {
+ destMemType = TYP_FLOAT;
+ }
+#endif
+ varNumSrc = regArgTab[srcReg].varNum;
+ noway_assert(varNumSrc < compiler->lvaCount);
+ varDscSrc = compiler->lvaTable + varNumSrc;
+ noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
+
+ if (destMemType == TYP_REF)
+ {
+ size = EA_GCREF;
+ }
+ else if (destMemType == TYP_DOUBLE)
+ {
+ size = EA_8BYTE;
+ }
+ else
+ {
+ size = EA_4BYTE;
+ }
+ }
+
+ /* take care of the beginning register */
+
+ noway_assert(srcReg == begReg);
+
+ /* move the dest reg (begReg) in the extra reg */
+
+ regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
+
+ getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
+
+ regTracker.rsTrackRegCopy(destRegNum, xtraReg);
+
+ psiMoveToReg(varNumSrc);
+
+ /* mark the beginning register as processed */
+
+ regArgTab[srcReg].processed = true;
+#ifdef _TARGET_ARM_
+ if (size == EA_8BYTE)
+ regArgTab[srcReg + 1].processed = true;
+#endif
+ regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
+ }
+ }
+ }
+
+ /* Finally take care of the remaining arguments that must be enregistered */
+ while (regArgMaskLive)
+ {
+ regMaskTP regArgMaskLiveSave = regArgMaskLive;
+
+ for (argNum = 0; argNum < argMax; argNum++)
+ {
+ /* If already processed go to the next one */
+ if (regArgTab[argNum].processed)
+ {
+ continue;
+ }
+
+ if (regArgTab[argNum].slot == 0)
+ { // Not a register argument
+ continue;
+ }
+
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+ var_types regType = regArgTab[argNum].getRegType(compiler);
+ regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (regType == TYP_UNDEF)
+ {
+ // This could happen if the reg in regArgTab[argNum] is of the other register file -
+ // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+ // The next register file processing will process it.
+ regArgMaskLive &= ~genRegMask(regNum);
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+#ifndef _TARGET_64BIT_
+#ifndef _TARGET_ARM_
+ // Right now we think that incoming arguments are not pointer sized. When we eventually
+ // understand the calling convention, this still won't be true. But maybe we'll have a better
+ // idea of how to ignore it.
+
+ // On Arm, a long can be passed in register
+ noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == sizeof(void*));
+#endif
+#endif //_TARGET_64BIT_
+
+ noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
+
+ /* Register argument - hopefully it stays in the same register */
+ regNumber destRegNum = REG_NA;
+ var_types destMemType = varDsc->TypeGet();
+
+ if (regArgTab[argNum].slot == 1)
+ {
+ destRegNum = varDsc->lvRegNum;
+
+#ifdef _TARGET_ARM_
+ if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
+ {
+ // The second half of the double has already been processed! Treat this as a single.
+ destMemType = TYP_FLOAT;
+ }
+#endif // _TARGET_ARM_
+ }
+#ifndef _TARGET_64BIT_
+ else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
+ {
+#ifndef LEGACY_BACKEND
+ assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
+ if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
+ {
+ destRegNum = regNum;
+ }
+ else
+#endif // !LEGACY_BACKEND
+ destRegNum = varDsc->lvOtherReg;
+
+ assert(destRegNum != REG_STK);
+ }
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ assert(destMemType == TYP_DOUBLE);
+
+ // For doubles, we move the entire double using the argNum representing
+ // the first half of the double. There are two things we won't do:
+ // (1) move the double when the 1st half of the destination is free but the
+ // 2nd half is occupied, and (2) move the double when the 2nd half of the
+ // destination is free but the 1st half is occupied. Here we consider the
+ // case where the first half can't be moved initially because its target is
+ // still busy, but the second half can be moved. We wait until the entire
+ // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
+ // and F2 single moving to F16. When we process F0, its target F2 is busy,
+ // so we skip it on the first pass. When we process F1, its target F3 is
+ // available. However, we want to move F0/F1 all at once, so we skip it here.
+ // We process F2, which frees up F2. The next pass through, we process F0 and
+ // F2/F3 are empty, so we move it. Note that if half of a double is involved
+ // in a circularity with a single, then we will have already moved that half
+ // above, so we go ahead and move the remaining half as a single.
+ // Because there are no circularities left, we are guaranteed to terminate.
+
+ assert(argNum > 0);
+ assert(regArgTab[argNum - 1].slot == 1);
+
+ if (!regArgTab[argNum - 1].processed)
+ {
+ // The first half of the double hasn't been processed; try to be processed at the same time
+ continue;
+ }
+
+ // The first half of the double has been processed but the second half hasn't!
+ // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
+ // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
+ // dependency logic above will move them as singles, leaving just F3 to move. Treat
+ // it as a single to finish the shuffling.
+
+ destMemType = TYP_FLOAT;
+ destRegNum = REG_NEXT(varDsc->lvRegNum);
+ }
+#endif // !_TARGET_64BIT_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ assert(argNum > 0);
+ assert(regArgTab[argNum - 1].slot == 1);
+ assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
+ destRegNum = varDsc->lvRegNum;
+ noway_assert(regNum != destRegNum);
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ noway_assert(destRegNum != REG_NA);
+ if (destRegNum != regNum)
+ {
+ /* Cannot trash a currently live register argument.
+ * Skip this one until its target will be free
+ * which is guaranteed to happen since we have no circular dependencies. */
+
+ regMaskTP destMask = genRegMask(destRegNum);
+#ifdef _TARGET_ARM_
+ // Don't process the double until both halves of the destination are clear.
+ if (genActualType(destMemType) == TYP_DOUBLE)
+ {
+ assert((destMask & RBM_DBL_REGS) != 0);
+ destMask |= genRegMask(REG_NEXT(destRegNum));
+ }
+#endif
+
+ if (destMask & regArgMaskLive)
+ {
+ continue;
+ }
+
+ /* Move it to the new register */
+
+ emitAttr size = emitActualTypeSize(destMemType);
+
+ getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
+
+ psiMoveToReg(varNum);
+ }
+
+ /* mark the argument as processed */
+
+ assert(!regArgTab[argNum].processed);
+ regArgTab[argNum].processed = true;
+ regArgMaskLive &= ~genRegMask(regNum);
+#if FEATURE_MULTIREG_ARGS
+ int argRegCount = 1;
+#ifdef _TARGET_ARM_
+ if (genActualType(destMemType) == TYP_DOUBLE)
+ {
+ argRegCount = 2;
+ }
+#endif
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
+ {
+ argRegCount = 2;
+ int nextArgNum = argNum + 1;
+ regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
+ noway_assert(regArgTab[nextArgNum].varNum == varNum);
+ // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
+ // and moves the 0th element of the src reg into the 1st element of the dest reg.
+ getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
+ // Set destRegNum to regNum so that we skip the setting of the register below,
+ // but mark argNum as processed and clear regNum from the live mask.
+ destRegNum = regNum;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ // Mark the rest of the argument registers corresponding to this multi-reg type as
+ // being processed and no longer live.
+ for (int regSlot = 1; regSlot < argRegCount; regSlot++)
+ {
+ int nextArgNum = argNum + regSlot;
+ assert(!regArgTab[nextArgNum].processed);
+ regArgTab[nextArgNum].processed = true;
+ regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
+ regArgMaskLive &= ~genRegMask(nextRegNum);
+ }
+#endif // FEATURE_MULTIREG_ARGS
+ }
+
+ noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
+ }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ * If any incoming stack arguments live in registers, load them.
+ */
+void CodeGen::genEnregisterIncomingStackArgs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genEnregisterIncomingStackArgs()\n");
+ }
+#endif
+
+ assert(compiler->compGeneratingProlog);
+
+ unsigned varNum = 0;
+
+ for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter? */
+
+ if (!varDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ /* If it's a register argument then it's already been taken care of.
+ But, on Arm when under a profiler, we would have prespilled a register argument
+ and hence here we need to load it from its prespilled location.
+ */
+ bool isPrespilledForProfiling = false;
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ isPrespilledForProfiling =
+ compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
+#endif
+
+ if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
+ {
+ continue;
+ }
+
+ /* Has the parameter been assigned to a register? */
+
+ if (!varDsc->lvIsInReg())
+ {
+ continue;
+ }
+
+ var_types type = genActualType(varDsc->TypeGet());
+
+#if FEATURE_STACK_FP_X87
+ // Floating point locals are loaded onto the x86-FPU in the next section
+ if (varTypeIsFloating(type))
+ continue;
+#endif
+
+ /* Is the variable dead on entry */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ /* Load the incoming parameter into the register */
+
+ /* Figure out the home offset of the incoming argument */
+
+ regNumber regNum;
+ regNumber otherReg;
+
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_ARM_
+ if (type == TYP_LONG)
+ {
+ regPairNo regPair = varDsc->lvArgInitRegPair;
+ regNum = genRegPairLo(regPair);
+ otherReg = genRegPairHi(regPair);
+ }
+ else
+#endif // _TARGET_ARM
+ {
+ regNum = varDsc->lvArgInitReg;
+ otherReg = REG_NA;
+ }
+#else // LEGACY_BACKEND
+ regNum = varDsc->lvRegNum;
+ if (type == TYP_LONG)
+ {
+ otherReg = varDsc->lvOtherReg;
+ }
+ else
+ {
+ otherReg = REG_NA;
+ }
+#endif // LEGACY_BACKEND
+
+ assert(regNum != REG_STK);
+
+#ifndef _TARGET_64BIT_
+ if (type == TYP_LONG)
+ {
+ /* long - at least the low half must be enregistered */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, regNum, varNum, 0);
+ regTracker.rsTrackRegTrash(regNum);
+
+ /* Is the upper half also enregistered? */
+
+ if (otherReg != REG_STK)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, otherReg, varNum, sizeof(int));
+ regTracker.rsTrackRegTrash(otherReg);
+ }
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ /* Loading a single register - this is the easy/common case */
+
+ getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
+ regTracker.rsTrackRegTrash(regNum);
+ }
+
+ psiMoveToReg(varNum);
+ }
+}
+
+/*-------------------------------------------------------------------------
+ *
+ * We have to decide whether we're going to use block initialization
+ * in the prolog before we assign final stack offsets. This is because
+ * when using block initialization we may need additional callee-saved
+ * registers which need to be saved on the frame, thus increasing the
+ * frame size.
+ *
+ * We'll count the number of locals we have to initialize,
+ * and if there are lots of them we'll use block initialization.
+ * Thus, the local variable table must have accurate register location
+ * information for enregistered locals for their register state on entry
+ * to the function.
+ *
+ * At the same time we set lvMustInit for locals (enregistered or on stack)
+ * that must be initialized (e.g. initialize memory (comInitMem),
+ * untracked pointers or disable DFA)
+ */
+void CodeGen::genCheckUseBlockInit()
+{
+#ifndef LEGACY_BACKEND // this is called before codegen in RyuJIT backend
+ assert(!compiler->compGeneratingProlog);
+#else // LEGACY_BACKEND
+ assert(compiler->compGeneratingProlog);
+#endif // LEGACY_BACKEND
+
+ unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
+ // larger than int count for more than 1).
+ unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
+ // determine whether to use block init.
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
+ {
+ noway_assert(varDsc->lvRefCnt == 0);
+ continue;
+ }
+
+ if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
+ {
+ continue;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
+ {
+ continue;
+ }
+ if (varNum == compiler->lvaOutgoingArgSpaceVar)
+ {
+ continue;
+ }
+#endif
+
+#if FEATURE_EH_FUNCLETS
+ // There's no need to force 0-initialization of the PSPSym, it will be
+ // initialized with a real value in the prolog
+ if (varNum == compiler->lvaPSPSym)
+ {
+ continue;
+ }
+#endif
+
+ if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
+ // initialized by the parent struct. No need to set the lvMustInit bit in the
+ // field locals.
+ continue;
+ }
+
+ if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
+ varDsc->lvMustInit)
+ {
+ if (varDsc->lvTracked)
+ {
+ /* For uninitialized use of tracked variables, the liveness
+ * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
+ */
+ if (varDsc->lvMustInit ||
+ VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ {
+ /* This var must be initialized */
+
+ varDsc->lvMustInit = 1;
+
+ /* See if the variable is on the stack will be initialized
+ * using rep stos - compute the total size to be zero-ed */
+
+ if (varDsc->lvOnFrame)
+ {
+ if (!varDsc->lvRegister)
+ {
+#ifndef LEGACY_BACKEND
+ if (!varDsc->lvIsInReg())
+#endif // !LEGACY_BACKEND
+ {
+ // Var is completely on the stack, in the legacy JIT case, or
+ // on the stack at entry, in the RyuJIT case.
+ initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum)) / sizeof(int);
+ }
+ }
+ else
+ {
+ // Var is partially enregistered
+ noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
+ initStkLclCnt += genTypeStSz(TYP_INT);
+ }
+ }
+ }
+ }
+
+ /* With compInitMem, all untracked vars will have to be init'ed */
+ /* VSW 102460 - Do not force initialization of compiler generated temps,
+ unless they are untracked GC type or structs that contain GC pointers */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_SIMD
+ // TODO-1stClassStructs
+ // This is here to duplicate previous behavior, where TYP_SIMD8 locals
+ // were not being re-typed correctly.
+ if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
+#else // !FEATURE_SIMD
+ if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
+#endif // !FEATURE_SIMD
+ varDsc->lvOnFrame &&
+ (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
+ {
+ varDsc->lvMustInit = true;
+
+ initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum)) / sizeof(int);
+ }
+
+ continue;
+ }
+
+ /* Ignore if not a pointer variable or value class with a GC field */
+
+ if (!compiler->lvaTypeIsGC(varNum))
+ {
+ continue;
+ }
+
+#if CAN_DISABLE_DFA
+ /* If we don't know lifetimes of variables, must be conservative */
+
+ if (compiler->opts.MinOpts())
+ {
+ varDsc->lvMustInit = true;
+ noway_assert(!varDsc->lvRegister);
+ }
+ else
+#endif // CAN_DISABLE_DFA
+ {
+ if (!varDsc->lvTracked)
+ {
+ varDsc->lvMustInit = true;
+ }
+ }
+
+ /* Is this a 'must-init' stack pointer local? */
+
+ if (varDsc->lvMustInit && varDsc->lvOnFrame)
+ {
+ initStkLclCnt += varDsc->lvStructGcCount;
+ }
+
+ if ((compiler->lvaLclSize(varNum) > (3 * sizeof(void*))) && (largeGcStructs <= 4))
+ {
+ largeGcStructs++;
+ }
+ }
+
+ /* Don't forget about spill temps that hold pointers */
+
+ if (!TRACK_GC_TEMP_LIFETIMES)
+ {
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
+ {
+ if (varTypeIsGC(tempThis->tdTempType()))
+ {
+ initStkLclCnt++;
+ }
+ }
+ }
+
+ // After debugging this further it was found that this logic is incorrect:
+ // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
+ // and this also double counts variables (we saw this in the debugger) around line 4829.
+ // Even though this doesn't pose a problem with correctness it will improperly decide to
+ // zero init the stack using a block operation instead of a 'case by case' basis.
+ genInitStkLclCnt = initStkLclCnt;
+
+ /* If we have more than 4 untracked locals, use block initialization */
+ /* TODO-Review: If we have large structs, bias toward not using block initialization since
+ we waste all the other slots. Really need to compute the correct
+ and compare that against zeroing the slots individually */
+
+ genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
+
+ if (genUseBlockInit)
+ {
+ regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
+
+ // If there is a secret stub param, don't count it, as it will no longer
+ // be live when we do block init.
+ if (compiler->info.compPublishStubParam)
+ {
+ maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
+ }
+
+#ifdef _TARGET_XARCH_
+ // If we're going to use "REP STOS", remember that we will trash EDI
+ // For fastcall we will have to save ECX, EAX
+ // so reserve two extra callee saved
+ // This is better than pushing eax, ecx, because we in the later
+ // we will mess up already computed offsets on the stack (for ESP frames)
+ regSet.rsSetRegsModified(RBM_EDI);
+
+#ifdef UNIX_AMD64_ABI
+ // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
+ // In such case use R12 and R13 registers.
+ if (maskCalleeRegArgMask & RBM_RCX)
+ {
+ regSet.rsSetRegsModified(RBM_R12);
+ }
+
+ if (maskCalleeRegArgMask & RBM_RDI)
+ {
+ regSet.rsSetRegsModified(RBM_R13);
+ }
+#else // !UNIX_AMD64_ABI
+ if (maskCalleeRegArgMask & RBM_ECX)
+ {
+ regSet.rsSetRegsModified(RBM_ESI);
+ }
+#endif // !UNIX_AMD64_ABI
+
+ if (maskCalleeRegArgMask & RBM_EAX)
+ {
+ regSet.rsSetRegsModified(RBM_EBX);
+ }
+
+#endif // _TARGET_XARCH_
+#ifdef _TARGET_ARM_
+ //
+ // On the Arm if we are using a block init to initialize, then we
+ // must force spill R4/R5/R6 so that we can use them during
+ // zero-initialization process.
+ //
+ int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
+ if (forceSpillRegCount > 0)
+ regSet.rsSetRegsModified(RBM_R4);
+ if (forceSpillRegCount > 1)
+ regSet.rsSetRegsModified(RBM_R5);
+ if (forceSpillRegCount > 2)
+ regSet.rsSetRegsModified(RBM_R6);
+#endif // _TARGET_ARM_
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Push any callee-saved registers we have used
+ */
+
+#if defined(_TARGET_ARM64_)
+void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
+#else
+void CodeGen::genPushCalleeSavedRegisters()
+#endif
+{
+ assert(compiler->compGeneratingProlog);
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
+ // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
+ // here.
+ regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
+#else // !defined(_TARGET_XARCH_) || FEATURE_STACK_FP_X87
+ regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+#endif
+
+#if ETW_EBP_FRAMED
+ if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
+ {
+ noway_assert(!"Used register RBM_FPBASE as a scratch register!");
+ }
+#endif
+
+#ifdef _TARGET_XARCH_
+ // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
+ if (isFramePointerUsed())
+ {
+ rsPushRegs &= ~RBM_FPBASE;
+ }
+#endif
+
+#ifdef _TARGET_ARMARCH_
+ // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
+ if (isFramePointerUsed())
+ rsPushRegs |= RBM_FPBASE;
+
+ //
+ // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
+ // changes in GC suspension architecture.
+ //
+ // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
+ // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
+ // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
+ // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
+ // be saved on the stack and the GC suspension would time out.
+ //
+ // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
+ // the following to make GC suspension work in the above scenario:
+ // - Make return address hijacking work even when lr is not saved on the stack.
+ // - Generate fully interruptible code for loops that contains calls
+ // - Generate fully interruptible code for leaf methods
+ //
+ // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
+ // is not worth it.
+ //
+ rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
+
+ regSet.rsMaskCalleeSaved = rsPushRegs;
+#endif // _TARGET_ARMARCH_
+
+#ifdef DEBUG
+ if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
+ {
+ printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
+ compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
+ dspRegMask(rsPushRegs);
+ printf("\n");
+ assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
+ }
+#endif // DEBUG
+
+#if defined(_TARGET_ARM_)
+ regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
+ regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
+
+ maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
+
+ assert(FitsIn<int>(maskPushRegsInt));
+ inst_IV(INS_push, (int)maskPushRegsInt);
+ compiler->unwindPushMaskInt(maskPushRegsInt);
+
+ if (maskPushRegsFloat != 0)
+ {
+ genPushFltRegs(maskPushRegsFloat);
+ compiler->unwindPushMaskFloat(maskPushRegsFloat);
+ }
+#elif defined(_TARGET_ARM64_)
+ // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
+ // options. Case numbers in comments here refer to this document.
+ //
+ // For most frames, generate, e.g.:
+ // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
+ // // ensures stack stays aligned.
+ // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
+ // // at top of frame (highest addresses).
+ // stp r21, r22, [sp, 0x70]
+ //
+ // Notes:
+ // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
+ // at the top of the frame.
+ // 2. If we save FP, then the first store is FP, LR.
+ // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
+ // preserve their lower 8 bytes, by calling convention.
+ // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
+ // consecutive.
+ // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
+
+ int totalFrameSize = genTotalFrameSize();
+
+ int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
+
+ regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
+ regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
+
+ if (compiler->info.compIsVarArgs)
+ {
+ assert(maskSaveRegsFloat == RBM_NONE);
+ }
+
+ int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
+ // generate based on various sizes.
+ int calleeSaveSPDelta = 0;
+ int calleeSaveSPDeltaUnaligned = 0;
+
+ if (isFramePointerUsed())
+ {
+ // We need to save both FP and LR.
+
+ assert((maskSaveRegsInt & RBM_FP) != 0);
+ assert((maskSaveRegsInt & RBM_LR) != 0);
+
+ if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
+ {
+ // Case #1.
+ //
+ // Generate:
+ // stp fp,lr,[sp,#-framesz]!
+ //
+ // The (totalFrameSize < 512) condition ensures that both the predecrement
+ // and the postincrement of SP can occur with STP.
+ //
+ // After saving callee-saved registers, we establish the frame pointer with:
+ // mov fp,sp
+ // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+
+ frameType = 1;
+
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
+ INS_OPTS_PRE_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
+
+ maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
+ offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
+ }
+ else if (totalFrameSize <= 512)
+ {
+ // Case #2.
+ //
+ // Generate:
+ // sub sp,sp,#framesz
+ // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
+ //
+ // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
+ // signed offset encoding.
+ //
+ // After saving callee-saved registers, we establish the frame pointer with:
+ // add fp,sp,#outsz
+ // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+
+ frameType = 2;
+
+ assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
+
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+ compiler->unwindAllocStack(totalFrameSize);
+
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
+
+ maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
+ offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
+ }
+ else
+ {
+ // Case 5 or 6.
+ //
+ // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
+ // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
+ // stored. For example:
+ // stp r19,r20,[sp,#-96]!
+ // stp d8,d9,[sp,#16]
+ // ... save varargs incoming integer registers ...
+ // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
+ // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
+ // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
+ // register):
+ // sub sp,sp,#16
+ // str r19,[sp,#8]
+ // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
+ // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
+ // above them. If that is preferable, we could implement it.
+ // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
+ //
+ // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
+ // padding from above).
+ // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
+ //
+ // Generate:
+ // sub sp,sp,#remainingFrameSz
+ // or, for large frames:
+ // mov rX, #remainingFrameSz // maybe multiple instructions
+ // sub sp,sp,rX
+ //
+ // followed by:
+ // stp fp,lr,[sp,#outsz]
+ // add fp,sp,#outsz
+ //
+ // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
+ // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
+ // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
+ // following sequences:
+ //
+ // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
+ //
+ // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
+ // stp fp,lr,[sp]
+ // mov fp,sp
+ // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
+ //
+ // Or:
+ //
+ // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
+ // // always guaranteed to be 8 byte aligned).
+ // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
+ // add fp,sp,#8
+ // sub sp,sp,#outsz - #8
+ //
+ // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
+ // mov rX, #outsz - #8 // maybe multiple instructions
+ // sub sp,sp,rX
+ // )
+
+ frameType = 3;
+
+ calleeSaveSPDeltaUnaligned =
+ totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
+ assert(calleeSaveSPDeltaUnaligned >= 0);
+ assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
+ calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
+
+ offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
+ assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
+ // store the callee-saved registers.
+
+ // We'll take care of these later, but callee-saved regs code shouldn't see them.
+ maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
+ }
+ }
+ else
+ {
+ // No frame pointer (no chaining).
+ assert((maskSaveRegsInt & RBM_FP) == 0);
+ assert((maskSaveRegsInt & RBM_LR) != 0);
+
+ // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
+ // if we only have one callee-saved register plus LR to save.
+
+ NYI("Frame without frame pointer");
+ offset = 0;
+ }
+
+ assert(frameType != 0);
+
+ genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
+
+ offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
+
+ // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
+ // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
+ // need to add codes at all.
+
+ if (compiler->info.compIsVarArgs)
+ {
+ // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
+ assert((offset % 16) == 0);
+ for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
+ {
+ regNumber reg2 = REG_NEXT(reg1);
+ // stp REG, REG + 1, [SP, #offset]
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
+ compiler->unwindNop();
+ offset += 2 * REGSIZE_BYTES;
+ }
+ }
+
+ if (frameType == 1)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+ compiler->unwindSetFrameReg(REG_FPBASE, 0);
+ }
+ else if (frameType == 2)
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ }
+ else if (frameType == 3)
+ {
+ int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
+ assert(remainingFrameSz > 0);
+ assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
+ // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
+
+ if (compiler->lvaOutgoingArgSpaceSize >= 504)
+ {
+ // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
+ // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
+ assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
+ int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
+ int spAdjustment2 = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
+ int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
+ assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
+
+ genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
+ offset += spAdjustment2;
+
+ // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
+ // some of it)
+
+ int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
+ assert(spAdjustment3 > 0);
+ assert((spAdjustment3 % 16) == 0);
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
+ compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
+
+ genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
+ offset += spAdjustment3;
+ }
+ else
+ {
+ genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
+ pInitRegZeroed);
+ offset += remainingFrameSz;
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ }
+ }
+
+ assert(offset == totalFrameSize);
+
+#elif defined(_TARGET_XARCH_)
+ // Push backwards so we match the order we will pop them in the epilog
+ // and all the other code that expects it to be in this order.
+ for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
+ {
+ regMaskTP regBit = genRegMask(reg);
+
+ if ((regBit & rsPushRegs) != 0)
+ {
+ inst_RV(INS_push, reg, TYP_REF);
+ compiler->unwindPush(reg);
+
+ if (!doubleAlignOrFramePointerUsed())
+ {
+ psiAdjustStackLevel(REGSIZE_BYTES);
+ }
+
+ rsPushRegs &= ~regBit;
+ }
+ }
+
+#else
+ assert(!"Unknown TARGET");
+#endif // _TARGET_*
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Probe the stack and allocate the local stack frame: subtract from SP.
+ * On ARM64, this only does the probing; allocating the frame is done when callee-saved registers are saved.
+ */
+
+void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (frameSize == 0)
+ {
+ return;
+ }
+
+ const size_t pageSize = compiler->eeGetPageSize();
+
+#ifdef _TARGET_ARM_
+ assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_XARCH_
+ if (frameSize == REGSIZE_BYTES)
+ {
+ // Frame size is the same as register size.
+ inst_RV(INS_push, REG_EAX, TYP_I_IMPL);
+ }
+ else
+#endif // _TARGET_XARCH_
+ if (frameSize < pageSize)
+ {
+#ifndef _TARGET_ARM64_
+ // Frame size is (0x0008..0x1000)
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif // !_TARGET_ARM64_
+ }
+ else if (frameSize < compiler->getVeryLargeFrameSize())
+ {
+ // Frame size is (0x1000..0x3000)
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
+ regTracker.rsTrackRegTrash(initReg);
+ *pInitRegZeroed = false; // The initReg does not contain zero
+#else
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize);
+#endif
+
+ if (frameSize >= 0x2000)
+ {
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
+ regTracker.rsTrackRegTrash(initReg);
+#else
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize);
+#endif
+ }
+
+#ifdef _TARGET_ARM64_
+ compiler->unwindPadding();
+#else // !_TARGET_ARM64_
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
+ compiler->unwindPadding();
+ getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg);
+#else
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif
+#endif // !_TARGET_ARM64_
+ }
+ else
+ {
+ // Frame size >= 0x3000
+ assert(frameSize >= compiler->getVeryLargeFrameSize());
+
+ // Emit the following sequence to 'tickle' the pages.
+ // Note it is important that stack pointer not change until this is
+ // complete since the tickles could cause a stack overflow, and we
+ // need to be able to crawl the stack afterward (which means the
+ // stack pointer needs to be known).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_XARCH_
+ bool pushedStubParam = false;
+ if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg))
+ {
+ // push register containing the StubParam
+ inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
+ pushedStubParam = true;
+ }
+#endif // !_TARGET_XARCH_
+
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+
+ //
+ // Can't have a label inside the ReJIT padding area
+ //
+ genPrologPadForReJit();
+
+#if CPU_LOAD_STORE_ARCH
+
+ // TODO-ARM64-Bug?: set the availMask properly!
+ regMaskTP availMask =
+ (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers
+ availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
+ availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
+
+ regNumber rOffset = initReg;
+ regNumber rLimit;
+ regNumber rTemp;
+ regMaskTP tempMask;
+
+ // We pick the next lowest register number for rTemp
+ noway_assert(availMask != RBM_NONE);
+ tempMask = genFindLowestBit(availMask);
+ rTemp = genRegNumFromMask(tempMask);
+ availMask &= ~tempMask;
+
+ // We pick the next lowest register number for rLimit
+ noway_assert(availMask != RBM_NONE);
+ tempMask = genFindLowestBit(availMask);
+ rLimit = genRegNumFromMask(tempMask);
+ availMask &= ~tempMask;
+
+ // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't
+ // make sense.
+ // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp.
+ //
+ // mov rLimit, -frameSize
+ // loop:
+ // ldr rTemp, [sp+rOffset]
+ // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding
+ // cmp rOffset, rLimit
+ // jge loop
+ noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
+ regTracker.rsTrackRegTrash(rTemp);
+#if defined(_TARGET_ARM_)
+ getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize);
+#elif defined(_TARGET_ARM64_)
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
+#endif // _TARGET_ARM64_
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit);
+ getEmitter()->emitIns_J(INS_bhi, NULL, -4);
+
+#else // !CPU_LOAD_STORE_ARCH
+
+ // Code size for each instruction. We need this because the
+ // backward branch is hard-coded with the number of bytes to branch.
+ // The encoding differs based on the architecture and what register is
+ // used (namely, using RAX has a smaller encoding).
+ //
+ // loop:
+ // For x86
+ // test [esp + eax], eax 3
+ // sub eax, 0x1000 5
+ // cmp EAX, -frameSize 5
+ // jge loop 2
+ //
+ // For AMD64 using RAX
+ // test [rsp + rax], rax 4
+ // sub rax, 0x1000 6
+ // cmp rax, -frameSize 6
+ // jge loop 2
+ //
+ // For AMD64 using RBP
+ // test [rsp + rbp], rbp 4
+ // sub rbp, 0x1000 7
+ // cmp rbp, -frameSize 7
+ // jge loop 2
+
+ getEmitter()->emitIns_R_ARR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0);
+ inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE);
+ inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE);
+
+ int bytesForBackwardJump;
+#ifdef _TARGET_AMD64_
+ assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
+ bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20);
+#else // !_TARGET_AMD64_
+ assert(initReg == REG_EAX);
+ bytesForBackwardJump = -15;
+#endif // !_TARGET_AMD64_
+
+ inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
+
+#endif // !CPU_LOAD_STORE_ARCH
+
+ *pInitRegZeroed = false; // The initReg does not contain zero
+
+#ifdef _TARGET_XARCH_
+ if (pushedStubParam)
+ {
+ // pop eax
+ inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(REG_SECRET_STUB_PARAM);
+ }
+#endif // _TARGET_XARCH_
+
+#if CPU_LOAD_STORE_ARCH
+ compiler->unwindPadding();
+#endif
+
+#if CPU_LOAD_STORE_ARCH
+#ifndef _TARGET_ARM64_
+ inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
+#endif // !_TARGET_ARM64_
+#else
+ // sub esp, frameSize 6
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif
+ }
+
+#ifndef _TARGET_ARM64_
+ compiler->unwindAllocStack(frameSize);
+
+ if (!doubleAlignOrFramePointerUsed())
+ {
+ psiAdjustStackLevel(frameSize);
+ }
+#endif // !_TARGET_ARM64_
+}
+
+#if defined(_TARGET_ARM_)
+
+void CodeGen::genPushFltRegs(regMaskTP regMask)
+{
+ assert(regMask != 0); // Don't call uness we have some registers to push
+ assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
+
+ regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
+ int slots = genCountBits(regMask);
+ // regMask should be contiguously set
+ regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
+ assert((tmpMask & (tmpMask - 1)) == 0);
+ assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
+
+ // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
+ noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
+ noway_assert((slots % 2) == 0);
+
+ getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
+}
+
+void CodeGen::genPopFltRegs(regMaskTP regMask)
+{
+ assert(regMask != 0); // Don't call uness we have some registers to pop
+ assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
+
+ regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
+ int slots = genCountBits(regMask);
+ // regMask should be contiguously set
+ regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
+ assert((tmpMask & (tmpMask - 1)) == 0);
+
+ // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
+ noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
+ noway_assert((slots % 2) == 0);
+
+ getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * If we have a jmp call, then the argument registers cannot be used in the
+ * epilog. So return the current call's argument registers as the argument
+ * registers for the jmp call.
+ */
+regMaskTP CodeGen::genJmpCallArgMask()
+{
+ assert(compiler->compGeneratingEpilog);
+
+ regMaskTP argMask = RBM_NONE;
+ for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
+ {
+ const LclVarDsc& desc = compiler->lvaTable[varNum];
+ if (desc.lvIsRegArg)
+ {
+ argMask |= genRegMask(desc.lvArgReg);
+ }
+ }
+ return argMask;
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Free the local stack frame: add to SP.
+ * If epilog unwind hasn't been started, and we generate code, we start unwind
+ * and set *pUnwindStarted = true.
+ */
+
+void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ if (frameSize == 0)
+ return;
+
+ // Add 'frameSize' to SP.
+ //
+ // Unfortunately, we can't just use:
+ //
+ // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
+ //
+ // because we need to generate proper unwind codes for each instruction generated,
+ // and large frame sizes might generate a temp register load which might
+ // need an unwind code. We don't want to generate a "NOP" code for this
+ // temp register load; we want the unwind codes to start after that.
+
+ if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
+ {
+ if (!*pUnwindStarted)
+ {
+ compiler->unwindBegEpilog();
+ *pUnwindStarted = true;
+ }
+
+ getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
+ }
+ else
+ {
+ regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
+ if (jmpEpilog)
+ {
+ // Do not use argument registers as scratch registers in the jmp epilog.
+ grabMask &= ~genJmpCallArgMask();
+ }
+#ifndef LEGACY_BACKEND
+ regNumber tmpReg;
+ tmpReg = REG_TMP_0;
+#else // LEGACY_BACKEND
+ regNumber tmpReg = regSet.rsGrabReg(grabMask);
+#endif // LEGACY_BACKEND
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
+ if (*pUnwindStarted)
+ {
+ compiler->unwindPadding();
+ }
+
+ // We're going to generate an unwindable instruction, so check again if
+ // we need to start the unwind codes.
+
+ if (!*pUnwindStarted)
+ {
+ compiler->unwindBegEpilog();
+ *pUnwindStarted = true;
+ }
+
+ getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
+ }
+
+ compiler->unwindAllocStack(frameSize);
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Returns register mask to push/pop to allocate a small stack frame,
+ * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
+ * is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
+ */
+regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
+{
+ assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
+
+ // We can't do this optimization with callee saved floating point registers because
+ // the stack would be allocated in a wrong spot.
+ if (maskCalleeSavedFloat != RBM_NONE)
+ return RBM_NONE;
+
+ // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
+ // that extra sub sp,XXX/add sp,XXX.
+ // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
+ // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
+ // significant negative side-effects (more memory bus traffic).
+ switch (frameSize)
+ {
+ case REGSIZE_BYTES:
+ return RBM_R3;
+ case 2 * REGSIZE_BYTES:
+ return RBM_R2 | RBM_R3;
+ default:
+ return RBM_NONE;
+ }
+}
+
+#endif // _TARGET_ARM_
+
+#if !FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ * initFltRegs -- The mask of float regs to be zeroed.
+ * initDblRegs -- The mask of double regs to be zeroed.
+ * initReg -- A zero initialized integer reg to copy from.
+ *
+ * Does best effort to move between VFP/xmm regs if one is already
+ * initialized to 0. (Arm Only) Else copies from the integer register which
+ * is slower.
+ */
+void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
+{
+ assert(compiler->compGeneratingProlog);
+
+ // The first float/double reg that is initialized to 0. So they can be used to
+ // initialize the remaining registers.
+ regNumber fltInitReg = REG_NA;
+ regNumber dblInitReg = REG_NA;
+
+ // Iterate through float/double registers and initialize them to 0 or
+ // copy from already initialized register of the same type.
+ regMaskTP regMask = genRegMask(REG_FP_FIRST);
+ for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
+ {
+ if (regMask & initFltRegs)
+ {
+ // Do we have a float register already set to 0?
+ if (fltInitReg != REG_NA)
+ {
+ // Copy from float.
+ inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ // Do we have a double register initialized to 0?
+ if (dblInitReg != REG_NA)
+ {
+ // Copy from double.
+ inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
+ }
+ else
+ {
+ // Copy from int.
+ inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
+ }
+#elif defined(_TARGET_XARCH_)
+ // Xorpd xmmreg, xmmreg is the fastest way to initialize a float register to
+ // zero instead of moving constant 0.0f. Though we just need to initialize just the 32-bits
+ // we will use xorpd to initialize 64-bits of the xmm register so that it can be
+ // used to zero initialize xmm registers that hold double values.
+ inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
+ dblInitReg = reg;
+#elif defined(_TARGET_ARM64_)
+ NYI("Initialize floating-point register to zero");
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+ fltInitReg = reg;
+ }
+ }
+ else if (regMask & initDblRegs)
+ {
+ // Do we have a double register already set to 0?
+ if (dblInitReg != REG_NA)
+ {
+ // Copy from double.
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ // Do we have a float register initialized to 0?
+ if (fltInitReg != REG_NA)
+ {
+ // Copy from float.
+ inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
+ }
+ else
+ {
+ // Copy from int.
+ inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
+ }
+#elif defined(_TARGET_XARCH_)
+ // Xorpd xmmreg, xmmreg is the fastest way to initialize a double register to
+ // zero than moving constant 0.0d. We can also use lower 32-bits of 'reg'
+ // for zero initializing xmm registers subsequently that contain float values.
+ inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
+ fltInitReg = reg;
+#elif defined(_TARGET_ARM64_)
+ // We will just zero out the entire vector register. This sets it to a double zero value
+ getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+ dblInitReg = reg;
+ }
+ }
+ }
+}
+#endif // !FEATURE_STACK_FP_X87
+
+/*-----------------------------------------------------------------------------
+ *
+ * Restore any callee-saved registers we have used
+ */
+
+#if defined(_TARGET_ARM_)
+
+bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
+ return true;
+ else
+ return false;
+}
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+ regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
+ regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
+
+ // First, pop float registers
+
+ if (maskPopRegsFloat != RBM_NONE)
+ {
+ genPopFltRegs(maskPopRegsFloat);
+ compiler->unwindPopMaskFloat(maskPopRegsFloat);
+ }
+
+ // Next, pop integer registers
+
+ if (!jmpEpilog)
+ {
+ regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
+ maskPopRegsInt |= maskStackAlloc;
+ }
+
+ if (isFramePointerUsed())
+ {
+ assert(!regSet.rsRegsModified(RBM_FPBASE));
+ maskPopRegsInt |= RBM_FPBASE;
+ }
+
+ if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
+ {
+ maskPopRegsInt |= RBM_PC;
+ // Record the fact that we use a pop to the PC to perform the return
+ genUsedPopToReturn = true;
+ }
+ else
+ {
+ maskPopRegsInt |= RBM_LR;
+ // Record the fact that we did not use a pop to the PC to perform the return
+ genUsedPopToReturn = false;
+ }
+
+ assert(FitsIn<int>(maskPopRegsInt));
+ inst_IV(INS_pop, (int)maskPopRegsInt);
+ compiler->unwindPopMaskInt(maskPopRegsInt);
+}
+
+#elif defined(_TARGET_ARM64_)
+
+void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+ if (isFramePointerUsed())
+ {
+ rsRestoreRegs |= RBM_FPBASE;
+ }
+
+ rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
+
+ regMaskTP regsToRestoreMask = rsRestoreRegs;
+
+ int totalFrameSize = genTotalFrameSize();
+
+ int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
+ // order.
+ int frameType = 0; // An indicator of what type of frame we are popping.
+ int calleeSaveSPDelta = 0;
+ int calleeSaveSPDeltaUnaligned = 0;
+
+ if (isFramePointerUsed())
+ {
+ if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
+ {
+ frameType = 1;
+ if (compiler->compLocallocUsed)
+ {
+ // Restore sp from fp
+ // mov sp, fp
+ inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
+ compiler->unwindSetFrameReg(REG_FPBASE, 0);
+ }
+
+ regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+
+ // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
+ // of stack.
+ calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+ }
+ else if (totalFrameSize <= 512)
+ {
+ frameType = 2;
+ if (compiler->compLocallocUsed)
+ {
+ // Restore sp from fp
+ // sub sp, fp, #outsz
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ }
+
+ regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+
+ // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
+ // of stack.
+ calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+ }
+ else
+ {
+ frameType = 3;
+
+ calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
+ 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
+ assert(calleeSaveSPDeltaUnaligned >= 0);
+ assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
+ calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
+
+ regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
+
+ int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
+ assert(remainingFrameSz > 0);
+
+ if (compiler->lvaOutgoingArgSpaceSize >= 504)
+ {
+ // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
+ // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
+ assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
+ int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
+ int spAdjustment2 = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
+ int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
+ assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
+
+ if (compiler->compLocallocUsed)
+ {
+ // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
+ // prolog.
+ // sub sp, fp, #alignmentAdjustment2
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
+ compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
+ }
+ else
+ {
+ // Generate:
+ // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
+ // ; careful
+ int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
+ assert(spAdjustment3 > 0);
+ assert((spAdjustment3 % 16) == 0);
+ genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
+ }
+
+ // Generate:
+ // ldp fp,lr,[sp]
+ // add sp,sp,#remainingFrameSz
+ genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP0, nullptr);
+ }
+ else
+ {
+ if (compiler->compLocallocUsed)
+ {
+ // Restore sp from fp
+ // sub sp, fp, #outsz
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ }
+
+ // Generate:
+ // ldp fp,lr,[sp,#outsz]
+ // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
+ // ; it's large
+
+ genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP0,
+ nullptr);
+ }
+
+ // Unlike frameType=1 or frameType=2 that restore SP at the end,
+ // frameType=3 already adjusted SP above to delete local frame.
+ // There is at most one alignment slot between SP and where we store the callee-saved registers.
+ calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
+ assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
+ }
+ }
+ else
+ {
+ // No frame pointer (no chaining).
+ NYI("Frame without frame pointer");
+ calleeSaveSPOffset = 0;
+ }
+
+ genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
+
+ if (frameType == 1)
+ {
+ // Generate:
+ // ldp fp,lr,[sp],#framesz
+
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
+ INS_OPTS_POST_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
+ }
+ else if (frameType == 2)
+ {
+ // Generate:
+ // ldr fp,lr,[sp,#outsz]
+ // add sp,sp,#framesz
+
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+ compiler->unwindAllocStack(totalFrameSize);
+ }
+ else if (frameType == 3)
+ {
+ // Nothing to do after restoring callee-saved registers.
+ }
+ else
+ {
+ unreached();
+ }
+}
+
+#elif defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ unsigned popCount = 0;
+ if (regSet.rsRegsModified(RBM_EBX))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_FPBASE))
+ {
+ // EBP cannot be directly modified for EBP frame and double-aligned frames
+ assert(!doubleAlignOrFramePointerUsed());
+
+ popCount++;
+ inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+ }
+
+#ifndef UNIX_AMD64_ABI
+ // For System V AMD64 calling convention ESI and EDI are volatile registers.
+ if (regSet.rsRegsModified(RBM_ESI))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_EDI))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
+ }
+#endif // !defined(UNIX_AMD64_ABI)
+
+#ifdef _TARGET_AMD64_
+ if (regSet.rsRegsModified(RBM_R12))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_R13))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_R14))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_R15))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
+ }
+#endif // _TARGET_AMD64_
+
+ // Amd64/x86 doesn't support push/pop of xmm registers.
+ // These will get saved to stack separately after allocating
+ // space on stack in prolog sequence. PopCount is essentially
+ // tracking the count of integer registers pushed.
+
+ noway_assert(compiler->compCalleeRegsPushed == popCount);
+}
+
+#elif defined(_TARGET_X86_)
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ unsigned popCount = 0;
+
+ /* NOTE: The EBP-less frame code below depends on the fact that
+ all of the pops are generated right at the start and
+ each takes one byte of machine code.
+ */
+
+ if (regSet.rsRegsModified(RBM_FPBASE))
+ {
+ // EBP cannot be directly modified for EBP frame and double-aligned frames
+ noway_assert(!doubleAlignOrFramePointerUsed());
+
+ inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+ popCount++;
+ }
+ if (regSet.rsRegsModified(RBM_EBX))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_ESI))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_EDI))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
+ }
+ noway_assert(compiler->compCalleeRegsPushed == popCount);
+}
+
+#endif // _TARGET_*
+
+// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
+// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
+regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
+{
+#ifdef _TARGET_ARM64_
+ return REG_ZR;
+#else // !_TARGET_ARM64_
+ if (*pInitRegZeroed == false)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+ *pInitRegZeroed = true;
+ }
+ return initReg;
+#endif // !_TARGET_ARM64_
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Do we have any untracked pointer locals at all,
+ * or do we need to initialize memory for locspace?
+ *
+ * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
+ * initializing memory (not inclusive).
+ * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
+ * initializing memory.
+ * initReg - A scratch register (that gets set to zero on some platforms).
+ * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
+ */
+void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (genUseBlockInit)
+ {
+ assert(untrLclHi > untrLclLo);
+#ifdef _TARGET_ARMARCH_
+ /*
+ Generate the following code:
+
+ For cnt less than 10
+
+ mov rZero1, 0
+ mov rZero2, 0
+ mov rCnt, <cnt>
+ stm <rZero1,rZero2>,[rAddr!]
+ <optional> stm <rZero1,rZero2>,[rAddr!]
+ <optional> stm <rZero1,rZero2>,[rAddr!]
+ <optional> stm <rZero1,rZero2>,[rAddr!]
+ <optional> str rZero1,[rAddr]
+
+ For rCnt greater than or equal to 10
+
+ mov rZero1, 0
+ mov rZero2, 0
+ mov rCnt, <cnt/2>
+ sub rAddr, sp, OFFS
+
+ loop:
+ stm <rZero1,rZero2>,[rAddr!]
+ sub rCnt,rCnt,1
+ jnz loop
+
+ <optional> str rZero1,[rAddr] // When cnt is odd
+
+ NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
+ */
+
+ regNumber rAddr;
+ regNumber rCnt = REG_NA; // Invalid
+ regMaskTP regMask;
+
+ regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
+ availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
+ // currently live
+ availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
+ // a large constant.
+
+#if defined(_TARGET_ARM_)
+
+ if (compiler->compLocallocUsed)
+ {
+ availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
+ }
+
+ regNumber rZero1; // We're going to use initReg for rZero1
+ regNumber rZero2;
+
+ // We pick the next lowest register number for rZero2
+ noway_assert(availMask != RBM_NONE);
+ regMask = genFindLowestBit(availMask);
+ rZero2 = genRegNumFromMask(regMask);
+ availMask &= ~regMask;
+ assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+ 0); // rZero2 is not a live incoming argument reg
+
+ // We pick the next lowest register number for rAddr
+ noway_assert(availMask != RBM_NONE);
+ regMask = genFindLowestBit(availMask);
+ rAddr = genRegNumFromMask(regMask);
+ availMask &= ~regMask;
+
+#else // !define(_TARGET_ARM_)
+
+ regNumber rZero1 = REG_ZR;
+ rAddr = initReg;
+ *pInitRegZeroed = false;
+
+#endif // !defined(_TARGET_ARM_)
+
+ bool useLoop = false;
+ unsigned uCntBytes = untrLclHi - untrLclLo;
+ assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
+ unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
+
+ // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
+ // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
+ // In both of these cases the stm/stp instruction will write two zeros to memory
+ // and we will use a single str instruction at the end whenever we have an odd count.
+ if (uCntSlots >= 10)
+ useLoop = true;
+
+ if (useLoop)
+ {
+ // We pick the next lowest register number for rCnt
+ noway_assert(availMask != RBM_NONE);
+ regMask = genFindLowestBit(availMask);
+ rCnt = genRegNumFromMask(regMask);
+ availMask &= ~regMask;
+ }
+
+ assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+ 0); // rAddr is not a live incoming argument reg
+#if defined(_TARGET_ARM_)
+ if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
+#else // !_TARGET_ARM_
+ if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
+#endif // !_TARGET_ARM_
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
+ }
+ else
+ {
+ // Load immediate into the InitReg register
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
+ getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
+ *pInitRegZeroed = false;
+ }
+
+ if (useLoop)
+ {
+ noway_assert(uCntSlots >= 2);
+ assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+ 0); // rCnt is not a live incoming argument reg
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
+ }
+
+#if defined(_TARGET_ARM_)
+ rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
+ ssize_t stmImm = (ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
+#endif // _TARGET_ARM_
+
+ if (!useLoop)
+ {
+ while (uCntBytes >= REGSIZE_BYTES * 2)
+ {
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
+#else // !_TARGET_ARM_
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
+ INS_OPTS_POST_INDEX);
+#endif // !_TARGET_ARM_
+ uCntBytes -= REGSIZE_BYTES * 2;
+ }
+ }
+ else // useLoop is true
+ {
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
+ getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
+#else // !_TARGET_ARM_
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
+ INS_OPTS_POST_INDEX); // zero stack slots
+ getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
+#endif // !_TARGET_ARM_
+ getEmitter()->emitIns_J(INS_bhi, NULL, -3);
+ uCntBytes %= REGSIZE_BYTES * 2;
+ }
+
+ if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
+ {
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
+#else // _TARGET_ARM_
+ if ((uCntBytes - REGSIZE_BYTES) == 0)
+ {
+ getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
+ }
+#endif // !_TARGET_ARM_
+ uCntBytes -= REGSIZE_BYTES;
+ }
+#ifdef _TARGET_ARM64_
+ if (uCntBytes > 0)
+ {
+ assert(uCntBytes == sizeof(int));
+ getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
+ uCntBytes -= sizeof(int);
+ }
+#endif // _TARGET_ARM64_
+ noway_assert(uCntBytes == 0);
+
+#elif defined(_TARGET_XARCH_)
+ /*
+ Generate the following code:
+
+ lea edi, [ebp/esp-OFFS]
+ mov ecx, <size>
+ xor eax, eax
+ rep stosd
+ */
+
+ noway_assert(regSet.rsRegsModified(RBM_EDI));
+
+#ifdef UNIX_AMD64_ABI
+ // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
+ {
+ noway_assert(regSet.rsRegsModified(RBM_R12));
+ inst_RV_RV(INS_mov, REG_R12, REG_RCX);
+ regTracker.rsTrackRegTrash(REG_R12);
+ }
+
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
+ {
+ noway_assert(regSet.rsRegsModified(RBM_R13));
+ inst_RV_RV(INS_mov, REG_R13, REG_RDI);
+ regTracker.rsTrackRegTrash(REG_R13);
+ }
+#else // !UNIX_AMD64_ABI
+ // For register arguments we may have to save ECX
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
+ {
+ noway_assert(regSet.rsRegsModified(RBM_ESI));
+ inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
+ regTracker.rsTrackRegTrash(REG_ESI);
+ }
+#endif // !UNIX_AMD64_ABI
+
+ noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
+
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
+ regTracker.rsTrackRegTrash(REG_EDI);
+
+ inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
+ instGen(INS_r_stosd);
+
+#ifdef UNIX_AMD64_ABI
+ // Move back the argument registers
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
+ {
+ inst_RV_RV(INS_mov, REG_RCX, REG_R12);
+ }
+
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
+ {
+ inst_RV_RV(INS_mov, REG_RDI, REG_R13);
+ }
+#else // !UNIX_AMD64_ABI
+ // Move back the argument registers
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
+ {
+ inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
+ }
+#endif // !UNIX_AMD64_ABI
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+ }
+ else if (genInitStkLclCnt > 0)
+ {
+ assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+ 0); // initReg is not a live incoming argument reg
+
+ /* Initialize any lvMustInit vars on the stack */
+
+ LclVarDsc* varDsc;
+ unsigned varNum;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (!varDsc->lvMustInit)
+ {
+ continue;
+ }
+
+ // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
+ // partially-enregistered vars in the case where we don't use a block init.
+ noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
+
+ // lvMustInit can only be set for GC types or TYP_STRUCT types
+ // or when compInitMem is true
+ // or when in debug code
+
+ noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
+ compiler->info.compInitMem || compiler->opts.compDbgCode);
+
+#ifdef _TARGET_64BIT_
+ if (!varDsc->lvOnFrame)
+ {
+ continue;
+ }
+#else // !_TARGET_64BIT_
+ if (varDsc->lvRegister)
+ {
+ if (varDsc->lvOnFrame)
+ {
+ /* This is a partially enregistered TYP_LONG var */
+ noway_assert(varDsc->lvOtherReg == REG_STK);
+ noway_assert(varDsc->lvType == TYP_LONG);
+
+ noway_assert(compiler->info.compInitMem);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, genGetZeroReg(initReg, pInitRegZeroed),
+ varNum, sizeof(int));
+ }
+ continue;
+ }
+#endif // !_TARGET_64BIT_
+
+ if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
+ (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+ {
+ // We only initialize the GC variables in the TYP_STRUCT
+ const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
+ const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+ for (unsigned i = 0; i < slots; i++)
+ {
+ if (gcPtrs[i] != TYPE_GC_NONE)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
+ genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
+ }
+ }
+ }
+ else
+ {
+ regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
+
+ // zero out the whole thing rounded up to a single stack slot size
+ unsigned lclSize = (unsigned)roundUp(compiler->lvaLclSize(varNum), sizeof(int));
+ unsigned i;
+ for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
+ }
+
+#ifdef _TARGET_64BIT_
+ assert(i == lclSize || (i + sizeof(int) == lclSize));
+ if (i != lclSize)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
+ i += sizeof(int);
+ }
+#endif // _TARGET_64BIT_
+ assert(i == lclSize);
+ }
+ }
+
+ if (!TRACK_GC_TEMP_LIFETIMES)
+ {
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr;
+ tempThis = compiler->tmpListNxt(tempThis))
+ {
+ if (!varTypeIsGC(tempThis->tdTempType()))
+ {
+ continue;
+ }
+
+ // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
+
+ inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
+ }
+ }
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Save the generic context argument.
+ *
+ * We need to do this within the "prolog" in case anyone tries to inspect
+ * the param-type-arg/this (which can be done after the prolog) using
+ * ICodeManager::GetParamTypeArg().
+ */
+
+void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ bool reportArg = compiler->lvaReportParamTypeArg();
+
+ // We should report either generic context arg or "this" when used so.
+ if (!reportArg)
+ {
+#ifndef JIT32_GCENCODER
+ if (!compiler->lvaKeepAliveAndReportThis())
+#endif
+ {
+ return;
+ }
+ }
+
+ // For JIT32_GCENCODER, we won't be here if reportArg is false.
+ unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
+
+ noway_assert(contextArg != BAD_VAR_NUM);
+ LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
+
+ // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
+ // moved to its final home location. So we need to use it from the
+ // incoming location.
+
+ regNumber reg;
+
+ bool isPrespilledForProfiling = false;
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ isPrespilledForProfiling =
+ compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
+#endif
+
+ // Load from the argument register only if it is not prespilled.
+ if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
+ {
+ reg = varDsc->lvArgReg;
+ }
+ else
+ {
+ if (isFramePointerUsed())
+ {
+#if defined(_TARGET_ARM_)
+ // lvStkOffs is always valid for incoming stack-arguments, even if the argument
+ // will become enregistered.
+ // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
+ noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
+ (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
+#else
+ // lvStkOffs is always valid for incoming stack-arguments, even if the argument
+ // will become enregistered.
+ noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
+#endif
+ }
+
+ // We will just use the initReg since it is an available register
+ // and we are probably done using it anyway...
+ reg = initReg;
+ *pInitRegZeroed = false;
+
+ // mov reg, [compiler->info.compTypeCtxtArg]
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
+ regTracker.rsTrackRegTrash(reg);
+ }
+
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
+ compiler->lvaCachedGenericContextArgOffset());
+#else // CPU_LOAD_STORE_ARCH
+ // mov [ebp-lvaCachedGenericContextArgOffset()], reg
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
+ compiler->lvaCachedGenericContextArgOffset());
+#endif // !CPU_LOAD_STORE_ARCH
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Set the "GS" security cookie in the prolog.
+ */
+
+void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (!compiler->getNeedsGSSecurityCookie())
+ {
+ return;
+ }
+
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+ {
+#ifdef _TARGET_AMD64_
+ // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
+ getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
+#else
+ // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
+ compiler->lvaGSSecurityCookie, 0, initReg);
+#endif
+ }
+ else
+ {
+ regNumber reg;
+#ifdef _TARGET_XARCH_
+ // Always use EAX on x86 and x64
+ // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
+ reg = REG_EAX;
+#else
+ // We will just use the initReg since it is an available register
+ reg = initReg;
+#endif
+
+ *pInitRegZeroed = false;
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
+ regTracker.rsTrackRegTrash(reg);
+#else
+ // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
+ // mov dword ptr [frame.GSSecurityCookie], reg
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ regTracker.rsTrackRegTrash(reg);
+#endif
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
+ }
+}
+
+#ifdef PROFILING_SUPPORTED
+
+/*-----------------------------------------------------------------------------
+ *
+ * Generate the profiling function enter callback.
+ */
+
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ // Give profiler a chance to back out of hooking this method
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+#ifndef LEGACY_BACKEND
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+ noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+ // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
+ // In case of vararg methods, arg regs are already homed.
+ //
+ // Note: Here we don't need to worry about updating gc'info since enter
+ // callback is generated as part of prolog which is non-gc interruptible.
+ // Moreover GC cannot kick while executing inside profiler callback which is a
+ // profiler requirement so it can examine arguments which could be obj refs.
+ if (!compiler->info.compIsVarArgs)
+ {
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ var_types storeType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg;
+ getEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), argReg, varNum, 0);
+ }
+ }
+
+ // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
+ // RCX = ProfilerMethHnd
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ // Profiler hooks enabled during Ngen time.
+ // Profiler handle needs to be accessed through an indirection of a pointer.
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ // No need to record relocations, if we are generating ELT hooks under the influence
+ // of complus_JitELtHookEnabled=1
+ if (compiler->opts.compJitELTHookEnabled)
+ {
+ genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ }
+
+ // RDX = caller's SP
+ // Notes
+ // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
+ // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
+ // of that offset to FramePointer to obtain caller's SP value.
+ assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+
+ // Can't have a call until we have enough padding for rejit
+ genPrologPadForReJit();
+
+ // This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov rax, helper addr; call rax"
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
+
+ // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
+ // generation logic that moves args around as required by first BB entry point conditions
+ // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
+ // and genEnregisterIncomingStackArgs().
+ //
+ // Now reload arg registers from home locations.
+ // Vararg methods:
+ // - we need to reload only known (i.e. fixed) reg args.
+ // - if floating point type, also reload it into corresponding integer reg
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg;
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+#if FEATURE_VARARG
+ if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
+ {
+ regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
+ instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
+ inst_RV_RV(ins, argReg, intArgReg, loadType);
+ }
+#endif // FEATURE_VARARG
+ }
+
+ // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
+ if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+ {
+ *pInitRegZeroed = false;
+ }
+
+#else //!_TARGET_AMD64_
+ NYI("RyuJIT: Emit Profiler Enter callback");
+#endif
+
+#else // LEGACY_BACKEND
+
+ unsigned saveStackLvl2 = genStackLevel;
+
+#if defined(_TARGET_X86_)
+ // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
+ // for x86 stack unwinding
+
+ // Push the profilerHandle
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+#elif defined(_TARGET_ARM_)
+ // On Arm arguments are prespilled on stack, which frees r0-r3.
+ // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
+ // The call target register could be any free register.
+ regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_ENTER_ARG);
+ noway_assert(argReg == REG_PROFILER_ENTER_ARG);
+ regSet.rsLockReg(RBM_PROFILER_ENTER_ARG);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ regTracker.rsTrackRegTrash(argReg);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ }
+#else // _TARGET_*
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
+#endif // _TARGET_*
+
+ //
+ // Can't have a call until we have enough padding for rejit
+ //
+ genPrologPadForReJit();
+
+ // This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov rax, helper addr; call rax"
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
+ 0, // argSize. Again, we have to lie about it
+ EA_UNKNOWN); // retSize
+
+#if defined(_TARGET_X86_)
+ //
+ // Adjust the number of stack slots used by this managed method if necessary.
+ //
+ if (compiler->fgPtrArgCntMax < 1)
+ {
+ compiler->fgPtrArgCntMax = 1;
+ }
+#elif defined(_TARGET_ARM_)
+ // Unlock registers
+ regSet.rsUnlockReg(RBM_PROFILER_ENTER_ARG);
+
+ if (initReg == argReg)
+ {
+ *pInitRegZeroed = false;
+ }
+#else // _TARGET_*
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
+#endif // _TARGET_*
+
+ /* Restore the stack level */
+
+ genStackLevel = saveStackLvl2;
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Generates Leave profiler hook.
+ * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+ */
+
+void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
+{
+ // Only hook if profiler says it's okay.
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ compiler->info.compProfilerCallback = true;
+
+ // Need to save on to the stack level, since the callee will pop the argument
+ unsigned saveStackLvl2 = genStackLevel;
+
+#ifndef LEGACY_BACKEND
+
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
+ // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+ noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+ // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
+ // registers that profiler callback kills.
+ if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
+ {
+ regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
+ noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
+ }
+
+ // At this point return value is computed and stored in RAX or XMM0.
+ // On Amd64, Leave callback preserves the return register. We keep
+ // RAX alive by not reporting as trashed by helper call. Also note
+ // that GC cannot kick-in while executing inside profiler callback,
+ // which is a requirement of profiler as well since it needs to examine
+ // return value which could be an obj ref.
+
+ // RCX = ProfilerMethHnd
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ // Profiler hooks enabled during Ngen time.
+ // Profiler handle needs to be accessed through an indirection of an address.
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ // Don't record relocations, if we are generating ELT hooks under the influence
+ // of complus_JitELtHookEnabled=1
+ if (compiler->opts.compJitELTHookEnabled)
+ {
+ genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ }
+
+ // RDX = caller's SP
+ // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
+ // of the stmnts to execute unconditionally and clean-up rest.
+ if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+ {
+ // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
+ // value of that offset to FramePointer to obtain caller's SP value.
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+ }
+ else
+ {
+ // If we are here means that it is a tentative frame layout during which we
+ // cannot use caller's SP offset since it is an estimate. For now we require the
+ // method to have at least a single arg so that we can use it to obtain caller's
+ // SP.
+ LclVarDsc* varDsc = compiler->lvaTable;
+ NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
+
+ // lea rdx, [FramePointer + Arg0's offset]
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
+ }
+
+ // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
+ // We use R8 here. This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov r8, helper addr; call r8"
+ genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
+
+#else //!_TARGET_AMD64_
+ NYI("RyuJIT: Emit Profiler Leave callback");
+#endif // _TARGET_*
+
+#else // LEGACY_BACKEND
+
+#if defined(_TARGET_X86_)
+ //
+ // Push the profilerHandle
+ //
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+ genSinglePush();
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
+ sizeof(int) * 1, // argSize
+ EA_UNKNOWN); // retSize
+
+ //
+ // Adjust the number of stack slots used by this managed method if necessary.
+ //
+ if (compiler->fgPtrArgCntMax < 1)
+ {
+ compiler->fgPtrArgCntMax = 1;
+ }
+#elif defined(_TARGET_ARM_)
+ //
+ // Push the profilerHandle
+ //
+
+ // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock
+ // RBM_PROFILER_RET_USED always.
+ regNumber scratchReg = regSet.rsGrabReg(RBM_PROFILER_RET_SCRATCH);
+ noway_assert(scratchReg == REG_PROFILER_RET_SCRATCH);
+ regSet.rsLockReg(RBM_PROFILER_RET_USED);
+
+ // Contract between JIT and Profiler Leave callout on arm:
+ // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
+ // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
+ // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
+ // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
+ //
+ // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
+ // callback.
+ bool r0Trashed;
+ emitAttr attr = EA_UNKNOWN;
+
+ if (compiler->info.compRetType == TYP_VOID ||
+ (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP && (varTypeIsFloating(compiler->info.compRetType) ||
+ compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
+ {
+ r0Trashed = false;
+ }
+ else
+ {
+ // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
+ // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
+ if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
+ {
+ attr = EA_GCREF;
+ gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
+ }
+ else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
+ {
+ attr = EA_BYREF;
+ gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
+ }
+ else
+ {
+ attr = EA_4BYTE;
+ }
+
+ getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
+ regTracker.rsTrackRegTrash(REG_PROFILER_RET_SCRATCH);
+ gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
+ r0Trashed = true;
+ }
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
+ 0, // argSize
+ EA_UNKNOWN); // retSize
+
+ // Restore state that existed before profiler callback
+ if (r0Trashed)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+ gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
+ }
+
+ regSet.rsUnlockReg(RBM_PROFILER_RET_USED);
+#else // _TARGET_*
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking them");
+#endif // _TARGET_*
+
+#endif // LEGACY_BACKEND
+
+ /* Restore the stack level */
+ genStackLevel = saveStackLvl2;
+}
+
+#endif // PROFILING_SUPPORTED
+
+/*****************************************************************************
+
+Esp frames :
+----------
+
+These instructions are just a reordering of the instructions used today.
+
+push ebp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE / pop dummyReg
+pop ebx
+pop edi
+pop esi
+pop ebp
+ret
+
+Ebp frames :
+----------
+
+The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
+Everything else is similar, though in a different order.
+
+The security object will no longer be at a fixed offset. However, the
+offset can still be determined by looking up the GC-info and determining
+how many callee-saved registers are pushed.
+
+push ebp
+mov ebp, esp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE / pop dummyReg
+pop ebx
+pop edi
+pop esi
+(mov esp, ebp if there are no callee-saved registers)
+pop ebp
+ret
+
+Double-aligned frame :
+--------------------
+
+LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
+of callee-saved registers are pushed on the stack so that the locals
+themselves are qword-aligned. The instructions are the same as today,
+just in a different order.
+
+push ebp
+mov ebp, esp
+and esp, 0xFFFFFFFC
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
+pop ebx
+pop edi
+pop esi
+pop ebp
+mov esp, ebp
+pop ebp
+ret
+
+localloc (with ebp) frames :
+--------------------------
+
+The instructions are the same as today, just in a different order.
+Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
+which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
+
+push ebp
+mov ebp, esp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+lea esp, [ebp-calleeSavedRegsPushedSize]
+pop ebx
+pop edi
+pop esi
+(mov esp, ebp if there are no callee-saved registers)
+pop ebp
+ret
+
+*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Generates appropriate NOP padding for a function prolog to support ReJIT.
+ */
+
+void CodeGen::genPrologPadForReJit()
+{
+ assert(compiler->compGeneratingProlog);
+
+#ifdef _TARGET_XARCH_
+ if (!(compiler->opts.eeFlags & CORJIT_FLG_PROF_REJIT_NOPS))
+ {
+ return;
+ }
+
+#if FEATURE_EH_FUNCLETS
+
+ // No need to generate pad (nops) for funclets.
+ // When compiling the main function (and not a funclet)
+ // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
+ if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
+ {
+ return;
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
+ if (size < 5)
+ {
+ instNop(5 - size);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Reserve space for a function prolog.
+ */
+
+void CodeGen::genReserveProlog(BasicBlock* block)
+{
+ assert(block != nullptr);
+
+ JITDUMP("Reserving prolog IG for block BB%02u\n", block->bbNum);
+
+ /* Nothing is live on entry to the prolog */
+
+ getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
+}
+
+/*****************************************************************************
+ *
+ * Reserve space for a function epilog.
+ */
+
+void CodeGen::genReserveEpilog(BasicBlock* block)
+{
+ VARSET_TP VARSET_INIT(compiler, gcrefVarsArg, getEmitter()->emitThisGCrefVars);
+ regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
+ regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
+
+ /* The return value is special-cased: make sure it goes live for the epilog */
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+ if (genFullPtrRegMap && !jmpEpilog)
+ {
+ if (varTypeIsGC(compiler->info.compRetNativeType))
+ {
+ noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
+
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
+
+ switch (compiler->info.compRetNativeType)
+ {
+ case TYP_REF:
+ gcrefRegsArg |= RBM_INTRET;
+ break;
+ case TYP_BYREF:
+ byrefRegsArg |= RBM_INTRET;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ JITDUMP("Reserving epilog IG for block BB%02u\n", block->bbNum);
+
+ assert(block != nullptr);
+ bool last = (block->bbNext == nullptr);
+ getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Reserve space for a funclet prolog.
+ */
+
+void CodeGen::genReserveFuncletProlog(BasicBlock* block)
+{
+ assert(block != nullptr);
+
+ /* Currently, no registers are live on entry to the prolog, except maybe
+ the exception object. There might be some live stack vars, but they
+ cannot be accessed until after the frame pointer is re-established.
+ In order to potentially prevent emitting a death before the prolog
+ and a birth right after it, we just report it as live during the
+ prolog, and rely on the prolog being non-interruptible. Trust
+ genCodeForBBlist to correctly initialize all the sets.
+
+ We might need to relax these asserts if the VM ever starts
+ restoring any registers, then we could have live-in reg vars...
+ */
+
+ noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
+ noway_assert(gcInfo.gcRegByrefSetCur == 0);
+
+ JITDUMP("Reserving funclet prolog IG for block BB%02u\n", block->bbNum);
+
+ getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, false);
+}
+
+/*****************************************************************************
+ *
+ * Reserve space for a funclet epilog.
+ */
+
+void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
+{
+ assert(block != nullptr);
+
+ JITDUMP("Reserving funclet epilog IG for block BB%02u\n", block->bbNum);
+
+ bool last = (block->bbNext == nullptr);
+ getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, last);
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ * Finalize the frame size and offset assignments.
+ *
+ * No changes can be made to the modified register set after this, since that can affect how many
+ * callee-saved registers get saved.
+ */
+void CodeGen::genFinalizeFrame()
+{
+ JITDUMP("Finalizing stack frame\n");
+
+#ifndef LEGACY_BACKEND
+ // Initializations need to happen based on the var locations at the start
+ // of the first basic block, so load those up. In particular, the determination
+ // of whether or not to use block init in the prolog is dependent on the variable
+ // locations on entry to the function.
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
+#endif // !LEGACY_BACKEND
+
+ genCheckUseBlockInit();
+
+ // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_)
+
+ if (compiler->compTailCallUsed)
+ {
+ // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
+ // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
+ // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
+ // actually get saved.
+
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
+ }
+#endif // _TARGET_X86_
+
+#if defined(_TARGET_ARMARCH_)
+ // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
+ // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
+ if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
+ {
+ regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
+ }
+#endif // defined(_TARGET_ARMARCH_)
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Modified regs: ");
+ dspRegMask(regSet.rsGetModifiedRegsMask());
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+ if (compiler->opts.compDbgEnC)
+ {
+ // We always save FP.
+ noway_assert(isFramePointerUsed());
+#ifdef _TARGET_AMD64_
+ // On x64 we always save exactly RBP, RSI and RDI for EnC.
+ regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
+ regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
+ noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
+#else // !_TARGET_AMD64_
+ // On x86 we save all callee saved regs so the saved reg area size is consistent
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+#endif // !_TARGET_AMD64_
+ }
+
+ /* If we have any pinvoke calls, we might potentially trash everything */
+ if (compiler->info.compCallUnmanaged)
+ {
+ noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
+ /* Count how many callee-saved registers will actually be saved (pushed) */
+
+ // EBP cannot be (directly) modified for EBP frame and double-aligned frames
+ noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
+
+#if ETW_EBP_FRAMED
+ // EBP cannot be (directly) modified
+ noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
+#endif
+
+ regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+#ifdef _TARGET_ARMARCH_
+ if (isFramePointerUsed())
+ {
+ // For a FP based frame we have to push/pop the FP register
+ //
+ maskCalleeRegsPushed |= RBM_FPBASE;
+
+ // This assert check that we are not using REG_FP
+ // as both the frame pointer and as a codegen register
+ //
+ assert(!regSet.rsRegsModified(RBM_FPBASE));
+ }
+
+ // we always push LR. See genPushCalleeSavedRegisters
+ //
+ maskCalleeRegsPushed |= RBM_LR;
+
+#if defined(_TARGET_ARM_)
+ // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
+ regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
+ regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
+
+ if ((maskPushRegsFloat != RBM_NONE) ||
+ (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
+ {
+ // Here we try to keep stack double-aligned before the vpush
+ if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
+ {
+ regNumber extraPushedReg = REG_R4;
+ while (maskPushRegsInt & genRegMask(extraPushedReg))
+ {
+ extraPushedReg = REG_NEXT(extraPushedReg);
+ }
+ if (extraPushedReg < REG_R11)
+ {
+ maskPushRegsInt |= genRegMask(extraPushedReg);
+ regSet.rsSetRegsModified(genRegMask(extraPushedReg));
+ }
+ }
+ maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
+ }
+
+ // We currently only expect to push/pop consecutive FP registers
+ // and these have to be double-sized registers as well.
+ // Here we will insure that maskPushRegsFloat obeys these requirements.
+ //
+ if (maskPushRegsFloat != RBM_NONE)
+ {
+ regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
+ while (maskPushRegsFloat > contiguousMask)
+ {
+ contiguousMask <<= 2;
+ contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
+ }
+ if (maskPushRegsFloat != contiguousMask)
+ {
+ regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
+ maskPushRegsFloat |= maskExtraRegs;
+ regSet.rsSetRegsModified(maskExtraRegs);
+ maskCalleeRegsPushed |= maskExtraRegs;
+ }
+ }
+#endif // _TARGET_ARM_
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // Compute the count of callee saved float regs saved on stack.
+ // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
+ // regs are stack allocated and preserved in their stack locations.
+ compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
+ maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+ compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
+ dspRegMask(maskCalleeRegsPushed);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* Assign the final offsets to things living on the stack frame */
+
+ compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
+
+ /* We want to make sure that the prolog size calculated here is accurate
+ (that is instructions will not shrink because of conservative stack
+ frame approximations). We do this by filling in the correct size
+ here (where we have committed to the final numbers for the frame offsets)
+ This will ensure that the prolog size is always correct
+ */
+ getEmitter()->emitMaxTmpSize = compiler->tmpSize;
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
+ {
+ compiler->lvaTableDump();
+ }
+#endif
+}
+
+//------------------------------------------------------------------------
+// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
+//
+// Arguments:
+// delta - the offset to add to the current stack pointer to establish the frame pointer
+// reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
+
+void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
+{
+ assert(compiler->compGeneratingProlog);
+
+#if defined(_TARGET_XARCH_)
+
+ if (delta == 0)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+ psiMoveESPtoEBP();
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+ // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
+ // anyway.
+ }
+
+ if (reportUnwindData)
+ {
+ compiler->unwindSetFrameReg(REG_FPBASE, delta);
+ }
+
+#elif defined(_TARGET_ARM_)
+
+ assert(arm_Valid_Imm_For_Add_SP(delta));
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+
+ if (reportUnwindData)
+ {
+ compiler->unwindPadding();
+ }
+
+#else
+ NYI("establish frame pointer");
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generates code for a function prolog.
+ *
+ * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
+ *
+ * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
+ * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
+ * only instructions which result in control not going to the next instruction. Basically, any time execution would
+ * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
+ * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
+ * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
+ *
+ * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
+ * debugger team to ensure that stepping still works.
+ *
+ * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genFnProlog()
+{
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ compiler->funSetCurrentFunc(0);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFnProlog()\n");
+ }
+#endif
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+#endif
+
+#ifdef LEGACY_BACKEND
+ genFinalizeFrame();
+#endif // LEGACY_BACKEND
+
+ assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
+
+ /* Ready to start on the prolog proper */
+
+ getEmitter()->emitBegProlog();
+ compiler->unwindBegProlog();
+
+#ifdef DEBUGGING_SUPPORT
+ // Do this so we can put the prolog instruction group ahead of
+ // other instruction groups
+ genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n__prolog:\n");
+ }
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ // Create new scopes for the method-parameters for the prolog-block.
+ psiBegProlog();
+ }
+#endif
+
+#ifdef DEBUG
+
+ if (compiler->compJitHaltMethod())
+ {
+ /* put a nop first because the debugger and other tools are likely to
+ put an int3 at the begining and we don't want to confuse them */
+
+ instGen(INS_nop);
+ instGen(INS_BREAKPOINT);
+
+#ifdef _TARGET_ARMARCH_
+ // Avoid asserts in the unwind info because these instructions aren't accounted for.
+ compiler->unwindPadding();
+#endif // _TARGET_ARMARCH_
+ }
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS && defined(DEBUG)
+
+ // We cannot force 0-initialization of the PSPSym
+ // as it will overwrite the real value
+ if (compiler->lvaPSPSym != BAD_VAR_NUM)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
+ assert(!varDsc->lvMustInit);
+ }
+
+#endif // FEATURE_EH_FUNCLETS && DEBUG
+
+ /*-------------------------------------------------------------------------
+ *
+ * Record the stack frame ranges that will cover all of the tracked
+ * and untracked pointer variables.
+ * Also find which registers will need to be zero-initialized.
+ *
+ * 'initRegs': - Generally, enregistered variables should not need to be
+ * zero-inited. They only need to be zero-inited when they
+ * have a possibly uninitialized read on some control
+ * flow path. Apparently some of the IL_STUBs that we
+ * generate have this property.
+ */
+
+ int untrLclLo = +INT_MAX;
+ int untrLclHi = -INT_MAX;
+ // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
+ // Note that they may be tracked, but simply not allocated to a register.
+ bool hasUntrLcl = false;
+
+ int GCrefLo = +INT_MAX;
+ int GCrefHi = -INT_MAX;
+ bool hasGCRef = false;
+
+ regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
+ regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
+ regMaskTP initDblRegs = RBM_NONE;
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
+ {
+ noway_assert(varDsc->lvRefCnt == 0);
+ continue;
+ }
+
+ signed int loOffs = varDsc->lvStkOffs;
+ signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
+
+ /* We need to know the offset range of tracked stack GC refs */
+ /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
+
+ if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
+ {
+ // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
+ // taken care of by the parent struct.
+ if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ hasGCRef = true;
+
+ if (loOffs < GCrefLo)
+ {
+ GCrefLo = loOffs;
+ }
+ if (hiOffs > GCrefHi)
+ {
+ GCrefHi = hiOffs;
+ }
+ }
+ }
+
+ /* For lvMustInit vars, gather pertinent info */
+
+ if (!varDsc->lvMustInit)
+ {
+ continue;
+ }
+
+ if (varDsc->lvIsInReg())
+ {
+ regMaskTP regMask = genRegMask(varDsc->lvRegNum);
+ if (!varDsc->IsFloatRegType())
+ {
+ initRegs |= regMask;
+
+ if (varTypeIsMultiReg(varDsc))
+ {
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ initRegs |= genRegMask(varDsc->lvOtherReg);
+ }
+ else
+ {
+ /* Upper DWORD is on the stack, and needs to be inited */
+
+ loOffs += sizeof(int);
+ goto INIT_STK;
+ }
+ }
+ }
+#if !FEATURE_STACK_FP_X87
+ else if (varDsc->TypeGet() == TYP_DOUBLE)
+ {
+ initDblRegs |= regMask;
+ }
+ else
+ {
+ initFltRegs |= regMask;
+ }
+#endif // !FEATURE_STACK_FP_X87
+ }
+ else
+ {
+ INIT_STK:
+
+ hasUntrLcl = true;
+
+ if (loOffs < untrLclLo)
+ {
+ untrLclLo = loOffs;
+ }
+ if (hiOffs > untrLclHi)
+ {
+ untrLclHi = hiOffs;
+ }
+ }
+ }
+
+ /* Don't forget about spill temps that hold pointers */
+
+ if (!TRACK_GC_TEMP_LIFETIMES)
+ {
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
+ {
+ if (!varTypeIsGC(tempThis->tdTempType()))
+ {
+ continue;
+ }
+
+ signed int loOffs = tempThis->tdTempOffs();
+ signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
+
+ // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
+ // previous frame pointer. Thus, stkOffs can't be zero.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_AMD64_)
+ // However, on amd64 there is no requirement to chain frame pointers.
+
+ noway_assert(!isFramePointerUsed() || loOffs != 0);
+#endif // !defined(_TARGET_AMD64_)
+ // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
+
+ hasUntrLcl = true;
+
+ if (loOffs < untrLclLo)
+ {
+ untrLclLo = loOffs;
+ }
+ if (hiOffs > untrLclHi)
+ {
+ untrLclHi = hiOffs;
+ }
+ }
+ }
+
+ assert((genInitStkLclCnt > 0) == hasUntrLcl);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (genInitStkLclCnt > 0)
+ {
+ printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
+ -untrLclHi);
+ }
+ }
+#endif
+
+#ifdef _TARGET_ARM_
+ // On the ARM we will spill any incoming struct args in the first instruction in the prolog
+ // Ditto for all enregistered user arguments in a varargs method.
+ // These registers will be available to use for the initReg. We just remove
+ // all of these registers from the rsCalleeRegArgMaskLiveIn.
+ //
+ intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
+#endif
+
+ /* Choose the register to use for zero initialization */
+
+ regNumber initReg = REG_SCRATCH; // Unless we find a better register below
+ bool initRegZeroed = false;
+ regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
+ regMaskTP tempMask;
+
+ // We should not use the special PINVOKE registers as the initReg
+ // since they are trashed by the jithelper call to setup the PINVOKE frame
+ if (compiler->info.compCallUnmanaged)
+ {
+ excludeMask |= RBM_PINVOKE_FRAME;
+
+ assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!compiler->opts.ShouldUsePInvokeHelpers())
+ {
+ noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
+
+ excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
+
+ // We also must exclude the register used by compLvFrameListRoot when it is enregistered
+ //
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+ if (varDsc->lvRegister)
+ {
+ excludeMask |= genRegMask(varDsc->lvRegNum);
+ }
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ // If we have a variable sized frame (compLocallocUsed is true)
+ // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
+ if (compiler->compLocallocUsed)
+ {
+ excludeMask |= RBM_SAVED_LOCALLOC_SP;
+ }
+#endif // _TARGET_ARM_
+
+#if defined(_TARGET_XARCH_)
+ if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
+ {
+ // We currently must use REG_EAX on x86 here
+ // because the loop's backwards branch depends upon the size of EAX encodings
+ assert(initReg == REG_EAX);
+ }
+ else
+#endif // _TARGET_XARCH_
+ {
+ tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
+
+ if (tempMask != RBM_NONE)
+ {
+ // We will use one of the registers that we were planning to zero init anyway.
+ // We pick the lowest register number.
+ tempMask = genFindLowestBit(tempMask);
+ initReg = genRegNumFromMask(tempMask);
+ }
+ // Next we prefer to use one of the unused argument registers.
+ // If they aren't available we use one of the caller-saved integer registers.
+ else
+ {
+ tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
+ if (tempMask != RBM_NONE)
+ {
+ // We pick the lowest register number
+ tempMask = genFindLowestBit(tempMask);
+ initReg = genRegNumFromMask(tempMask);
+ }
+ }
+ }
+
+ noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
+
+#if defined(_TARGET_AMD64_)
+ // If we are a varargs call, in order to set up the arguments correctly this
+ // must be done in a 2 step process. As per the x64 ABI:
+ // a) The caller sets up the argument shadow space (just before the return
+ // address, 4 pointer sized slots).
+ // b) The callee is responsible to home the arguments on the shadow space
+ // provided by the caller.
+ // This way, the varargs iterator will be able to retrieve the
+ // call arguments properly since both the arg regs and the stack allocated
+ // args will be contiguous.
+ if (compiler->info.compIsVarArgs)
+ {
+ getEmitter()->spillIntArgRegsToShadowSlots();
+ }
+
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_ARM_
+ /*-------------------------------------------------------------------------
+ *
+ * Now start emitting the part of the prolog which sets up the frame
+ */
+
+ if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
+ {
+ inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
+ compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
+ }
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_XARCH_
+ if (doubleAlignOrFramePointerUsed())
+ {
+ inst_RV(INS_push, REG_FPBASE, TYP_REF);
+ compiler->unwindPush(REG_FPBASE);
+ psiAdjustStackLevel(REGSIZE_BYTES);
+
+#ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
+ genEstablishFramePointer(0, /*reportUnwindData*/ true);
+#endif // !_TARGET_AMD64_
+
+#if DOUBLE_ALIGN
+ if (compiler->genDoubleAlign())
+ {
+ noway_assert(isFramePointerUsed() == false);
+ noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
+
+ inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
+ }
+#endif // DOUBLE_ALIGN
+ }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM64_
+ // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
+ genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
+ genPushCalleeSavedRegisters(initReg, &initRegZeroed);
+#else // !_TARGET_ARM64_
+ genPushCalleeSavedRegisters();
+#endif // !_TARGET_ARM64_
+
+#ifdef _TARGET_ARM_
+ bool needToEstablishFP = false;
+ int afterLclFrameSPtoFPdelta = 0;
+ if (doubleAlignOrFramePointerUsed())
+ {
+ needToEstablishFP = true;
+
+ // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
+ // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
+ // too big, we go ahead and do it here.
+
+ int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
+ afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
+ if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
+ {
+ // Oh well, it looks too big. Go ahead and establish the frame pointer here.
+ genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
+ needToEstablishFP = false;
+ }
+ }
+#endif // _TARGET_ARM_
+
+ //-------------------------------------------------------------------------
+ //
+ // Subtract the local frame size from SP.
+ //
+ //-------------------------------------------------------------------------
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_ARM64_
+ regMaskTP maskStackAlloc = RBM_NONE;
+
+#ifdef _TARGET_ARM_
+ maskStackAlloc =
+ genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
+#endif // _TARGET_ARM_
+
+ if (maskStackAlloc == RBM_NONE)
+ {
+ genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
+ }
+#endif // !_TARGET_ARM64_
+
+//-------------------------------------------------------------------------
+
+#ifdef _TARGET_ARM_
+ if (compiler->compLocallocUsed)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
+ regTracker.rsTrackRegTrash(REG_SAVED_LOCALLOC_SP);
+ compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
+ }
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // Preserve callee saved float regs to stack.
+ genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+#ifdef _TARGET_AMD64_
+ // Establish the AMD64 frame pointer after the OS-reported prolog.
+ if (doubleAlignOrFramePointerUsed())
+ {
+ bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
+ genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
+ }
+#endif //_TARGET_AMD64_
+
+//-------------------------------------------------------------------------
+//
+// This is the end of the OS-reported prolog for purposes of unwinding
+//
+//-------------------------------------------------------------------------
+
+#ifdef _TARGET_ARM_
+ if (needToEstablishFP)
+ {
+ genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
+ needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
+ }
+#endif // _TARGET_ARM_
+
+ if (compiler->info.compPublishStubParam)
+ {
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
+ compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
+#else
+ // mov [lvaStubArgumentVar], EAX
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
+ compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
+#endif
+ assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
+
+ // It's no longer live; clear it out so it can be used after this in the prolog
+ intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
+ }
+
+#if STACK_PROBES
+ // We could probably fold this into the loop for the FrameSize >= 0x3000 probing
+ // when creating the stack frame. Don't think it's worth it, though.
+ if (genNeedPrologStackProbe)
+ {
+ //
+ // Can't have a call until we have enough padding for rejit
+ //
+ genPrologPadForReJit();
+ noway_assert(compiler->opts.compNeedStackProbes);
+ genGenerateStackProbe();
+ compiler->compStackProbePrologDone = true;
+ }
+#endif // STACK_PROBES
+
+ //
+ // Zero out the frame as needed
+ //
+
+ genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
+
+#if FEATURE_EH_FUNCLETS
+
+ genSetPSPSym(initReg, &initRegZeroed);
+
+#else // !FEATURE_EH_FUNCLETS
+
+ // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
+ if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
+ {
+ /*
+ // size/speed option?
+ getEmitter()->emitIns_I_ARR(INS_mov, EA_PTRSIZE, 0,
+ REG_EBP, REG_NA, -compiler->lvaShadowSPfirstOffs);
+ */
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*));
+
+ // Zero out the slot for nesting level 0
+ unsigned firstSlotOffs = filterEndOffsetSlotOffs - (sizeof(void*));
+
+ if (!initRegZeroed)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+ initRegZeroed = true;
+ }
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
+ firstSlotOffs);
+ }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ genReportGenericContextArg(initReg, &initRegZeroed);
+
+#if defined(LEGACY_BACKEND) // in RyuJIT backend this has already been expanded into trees
+ if (compiler->info.compCallUnmanaged)
+ {
+ getEmitter()->emitDisableRandomNops();
+ initRegs = genPInvokeMethodProlog(initRegs);
+ getEmitter()->emitEnableRandomNops();
+ }
+#endif // defined(LEGACY_BACKEND)
+
+ // The local variable representing the security object must be on the stack frame
+ // and must be 0 initialized.
+ noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
+ (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
+ compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
+
+ // Initialize any "hidden" slots/locals
+
+ if (compiler->compLocallocUsed)
+ {
+ noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+#ifdef _TARGET_ARM64_
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_FPBASE, compiler->lvaLocAllocSPvar, 0);
+#else
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+#endif
+ }
+
+ // Set up the GS security cookie
+
+ genSetGSSecurityCookie(initReg, &initRegZeroed);
+
+#ifdef PROFILING_SUPPORTED
+
+ // Insert a function entry callback for profiling, if requested.
+ genProfilingEnterCallback(initReg, &initRegZeroed);
+
+#endif // PROFILING_SUPPORTED
+
+ if (!genInterruptible)
+ {
+ /*-------------------------------------------------------------------------
+ *
+ * The 'real' prolog ends here for non-interruptible methods.
+ * For fully-interruptible methods, we extend the prolog so that
+ * we do not need to track GC inforation while shuffling the
+ * arguments.
+ *
+ * Make sure there's enough padding for ReJIT.
+ *
+ */
+ genPrologPadForReJit();
+ getEmitter()->emitMarkPrologEnd();
+ }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ // The unused bits of Vector3 arguments must be cleared
+ // since native compiler doesn't initize the upper bits to zeros.
+ //
+ // TODO-Cleanup: This logic can be implemented in
+ // genFnPrologCalleeRegArgs() for argument registers and
+ // genEnregisterIncomingStackArgs() for stack arguments.
+ genClearStackVec3ArgUpperBits();
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
+
+ /*-----------------------------------------------------------------------------
+ * Take care of register arguments first
+ */
+
+ RegState* regState;
+
+#ifndef LEGACY_BACKEND
+ // Update the arg initial register locations.
+ compiler->lvaUpdateArgsWithInitialReg();
+#endif // !LEGACY_BACKEND
+
+ FOREACH_REGISTER_FILE(regState)
+ {
+ if (regState->rsCalleeRegArgMaskLiveIn)
+ {
+ // If we need an extra register to shuffle around the incoming registers
+ // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
+ // if we don't need to use the xtraReg then this flag will stay false
+ //
+ regNumber xtraReg;
+ bool xtraRegClobbered = false;
+
+ if (genRegMask(initReg) & RBM_ARG_REGS)
+ {
+ xtraReg = initReg;
+ }
+ else
+ {
+ xtraReg = REG_SCRATCH;
+ initRegZeroed = false;
+ }
+
+ genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
+
+ if (xtraRegClobbered)
+ {
+ initRegZeroed = false;
+ }
+ }
+ }
+
+ // Home the incoming arguments
+ genEnregisterIncomingStackArgs();
+
+ /* Initialize any must-init registers variables now */
+
+ if (initRegs)
+ {
+ regMaskTP regMask = 0x1;
+
+ for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
+ {
+ if (regMask & initRegs)
+ {
+ // Check if we have already zeroed this register
+ if ((reg == initReg) && initRegZeroed)
+ {
+ continue;
+ }
+ else
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
+ if (reg == initReg)
+ {
+ initRegZeroed = true;
+ }
+ }
+ }
+ }
+ }
+
+#if !FEATURE_STACK_FP_X87
+ if (initFltRegs | initDblRegs)
+ {
+ // If initReg is not in initRegs then we will use REG_SCRATCH
+ if ((genRegMask(initReg) & initRegs) == 0)
+ {
+ initReg = REG_SCRATCH;
+ initRegZeroed = false;
+ }
+
+#ifdef _TARGET_ARM_
+ // This is needed only for Arm since it can use a zero initialized int register
+ // to initialize vfp registers.
+ if (!initRegZeroed)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+ initRegZeroed = true;
+ }
+#endif // _TARGET_ARM_
+
+ genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
+ }
+#endif // !FEATURE_STACK_FP_X87
+
+#if FEATURE_STACK_FP_X87
+ //
+ // Here is where we load the enregistered floating point arguments
+ // and locals onto the x86-FPU.
+ //
+ genCodeForPrologStackFP();
+#endif
+
+ //-----------------------------------------------------------------------------
+
+ //
+ // Increase the prolog size here only if fully interruptible.
+ // And again make sure it's big enough for ReJIT
+ //
+
+ if (genInterruptible)
+ {
+ genPrologPadForReJit();
+ getEmitter()->emitMarkPrologEnd();
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ psiEndProlog();
+ }
+#endif
+
+ if (hasGCRef)
+ {
+ getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
+ }
+ else
+ {
+ noway_assert(GCrefLo == +INT_MAX);
+ noway_assert(GCrefHi == -INT_MAX);
+ }
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n");
+ }
+#endif
+
+#ifdef _TARGET_X86_
+ // On non-x86 the VARARG cookie does not need any special treatment.
+
+ // Load up the VARARG argument pointer register so it doesn't get clobbered.
+ // only do this if we actually access any statically declared args
+ // (our argument pointer register has a refcount > 0).
+ unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
+
+ if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt > 0)
+ {
+ varDsc = &compiler->lvaTable[argsStartVar];
+
+ noway_assert(compiler->info.compArgsCount > 0);
+
+ // MOV EAX, <VARARGS HANDLE>
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
+ regTracker.rsTrackRegTrash(REG_EAX);
+
+ // MOV EAX, [EAX]
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
+
+ // EDX might actually be holding something here. So make sure to only use EAX for this code
+ // sequence.
+
+ LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
+ noway_assert(!lastArg->lvRegister);
+ signed offset = lastArg->lvStkOffs;
+ assert(offset != BAD_STK_OFFS);
+ noway_assert(lastArg->lvFramePointerBased);
+
+ // LEA EAX, &<VARARGS HANDLE> + EAX
+ getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
+
+ if (varDsc->lvRegister)
+ {
+ if (varDsc->lvRegNum != REG_EAX)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
+ regTracker.rsTrackRegTrash(varDsc->lvRegNum);
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
+ }
+ }
+
+#endif // _TARGET_X86_
+
+#ifdef DEBUG
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+ }
+#endif
+
+ getEmitter()->emitEndProlog();
+ compiler->unwindEndProlog();
+
+ noway_assert(getEmitter()->emitMaxTmpSize == compiler->tmpSize);
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Generates code for a function epilog.
+ *
+ * Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+#if defined(_TARGET_ARM_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFnEpilog()\n");
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+ gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+ gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n__epilog:\n");
+
+ if (verbose)
+ {
+ printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+ dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+ printf(", gcRegGCrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+ printf(", gcRegByrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+ printf("\n");
+ }
+#endif
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+ // We delay starting the unwind codes until we have an instruction which we know
+ // needs an unwind code. In particular, for large stack frames in methods without
+ // localloc, the sequence might look something like this:
+ // movw r3, 0x38e0
+ // add sp, r3
+ // pop {r4,r5,r6,r10,r11,pc}
+ // In this case, the "movw" should not be part of the unwind codes, since it will
+ // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
+ // also sets the current location as the beginning offset of the epilog, so every
+ // instruction afterwards needs an unwind code. In the case above, if you call
+ // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
+
+ bool unwindStarted = false;
+
+ // Tear down the stack frame
+
+ if (compiler->compLocallocUsed)
+ {
+ if (!unwindStarted)
+ {
+ compiler->unwindBegEpilog();
+ unwindStarted = true;
+ }
+
+ // mov R9 into SP
+ inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
+ compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
+ }
+
+ if (jmpEpilog ||
+ genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
+ RBM_NONE)
+ {
+ genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
+ }
+
+ if (!unwindStarted)
+ {
+ // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
+ compiler->unwindBegEpilog();
+ unwindStarted = true;
+ }
+
+ genPopCalleeSavedRegisters(jmpEpilog);
+
+ if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
+ {
+ // We better not have used a pop PC to return otherwise this will be unreachable code
+ noway_assert(!genUsedPopToReturn);
+
+ int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+ inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
+ compiler->unwindAllocStack(preSpillRegArgSize);
+ }
+
+ if (jmpEpilog)
+ {
+ noway_assert(block->bbJumpKind == BBJ_RETURN);
+ noway_assert(block->bbTreeList);
+
+ // We better not have used a pop PC to return otherwise this will be unreachable code
+ noway_assert(!genUsedPopToReturn);
+
+ /* figure out what jump we have */
+
+ GenTree* jmpNode = block->lastNode();
+ noway_assert(jmpNode->gtOper == GT_JMP);
+
+ CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ void* addr;
+ regNumber indCallReg;
+ emitter::EmitCallType callType;
+
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+ switch (addrInfo.accessType)
+ {
+ case IAT_VALUE:
+ if (arm_Valid_Imm_For_BL((ssize_t)addrInfo.addr))
+ {
+ // Simple direct call
+ callType = emitter::EC_FUNC_TOKEN;
+ addr = addrInfo.addr;
+ indCallReg = REG_NA;
+ break;
+ }
+
+ // otherwise the target address doesn't fit in an immediate
+ // so we have to burn a register...
+ __fallthrough;
+
+ case IAT_PVALUE:
+ // Load the address into a register, load indirect and call through a register
+ // We have to use R12 since we assume the argument registers are in use
+ callType = emitter::EC_INDIR_R;
+ indCallReg = REG_R12;
+ addr = NULL;
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
+ if (addrInfo.accessType == IAT_PVALUE)
+ {
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+ }
+ break;
+
+ case IAT_PPVALUE:
+ default:
+ NO_WAY("Unsupported JMP indirection");
+ }
+
+ /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ * the same descriptor with some minor adjustments.
+ */
+
+ getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addr,
+ 0, // argSize
+ EA_UNKNOWN, // retSize
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // IL offset
+ indCallReg, // ireg
+ REG_NA, // xreg
+ 0, // xmul
+ 0, // disp
+ true); // isJump
+ }
+ else
+ {
+ if (!genUsedPopToReturn)
+ {
+ // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
+ // so we need a "bx lr" instruction to return from the function.
+ inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
+ compiler->unwindBranch16();
+ }
+ }
+
+ compiler->unwindEndEpilog();
+}
+
+#elif defined(_TARGET_ARM64_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFnEpilog()\n");
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+ gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+ gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n__epilog:\n");
+
+ if (verbose)
+ {
+ printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+ dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+ printf(", gcRegGCrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+ printf(", gcRegByrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+ printf("\n");
+ }
+#endif
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+ compiler->unwindBegEpilog();
+
+ genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
+
+ if (jmpEpilog)
+ {
+ noway_assert(block->bbJumpKind == BBJ_RETURN);
+ noway_assert(block->bbTreeList != nullptr);
+
+ // figure out what jump we have
+ GenTree* jmpNode = block->lastNode();
+#if !FEATURE_FASTTAILCALL
+ noway_assert(jmpNode->gtOper == GT_JMP);
+#else
+ // arm64
+ // If jmpNode is GT_JMP then gtNext must be null.
+ // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+ noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+ // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+ noway_assert((jmpNode->gtOper == GT_JMP) ||
+ ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+ // The next block is associated with this "if" stmt
+ if (jmpNode->gtOper == GT_JMP)
+#endif
+ {
+ // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ // the same descriptor with some minor adjustments.
+ CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+ if (addrInfo.accessType != IAT_VALUE)
+ {
+ NYI_ARM64("Unsupported JMP indirection");
+ }
+
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+
+ // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ // the same descriptor with some minor adjustments.
+ getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addrInfo.addr,
+ 0, // argSize
+ EA_UNKNOWN, // retSize
+ EA_UNKNOWN, // secondRetSize
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
+ true); /* isJump */
+ }
+#if FEATURE_FASTTAILCALL
+ else
+ {
+ // Fast tail call.
+ // Call target = REG_IP0.
+ // https://github.com/dotnet/coreclr/issues/4827
+ // Do we need a special encoding for stack walker like rex.w prefix for x64?
+ getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_IP0);
+ }
+#endif // FEATURE_FASTTAILCALL
+ }
+ else
+ {
+ inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
+ compiler->unwindReturn(REG_LR);
+ }
+
+ compiler->unwindEndEpilog();
+}
+
+#elif defined(_TARGET_XARCH_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFnEpilog()\n");
+ }
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+ gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+ gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+ noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+#endif
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n__epilog:\n");
+ }
+
+ if (verbose)
+ {
+ printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+ dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+ printf(", gcRegGCrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+ printf(", gcRegByrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+ printf("\n");
+ }
+#endif
+
+#if !FEATURE_STACK_FP_X87
+ // Restore float registers that were saved to stack before SP is modified.
+ genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
+#endif // !FEATURE_STACK_FP_X87
+
+ /* Compute the size in bytes we've pushed/popped */
+
+ if (!doubleAlignOrFramePointerUsed())
+ {
+ // We have an ESP frame */
+
+ noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
+
+ /* Get rid of our local variables */
+
+ if (compiler->compLclFrameSize)
+ {
+#ifdef _TARGET_X86_
+ /* Add 'compiler->compLclFrameSize' to ESP */
+ /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
+
+ if ((compiler->compLclFrameSize == sizeof(void*)) && !compiler->compJmpOpUsed)
+ {
+ inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(REG_ECX);
+ }
+ else
+#endif // _TARGET_X86
+ {
+ /* Add 'compiler->compLclFrameSize' to ESP */
+ /* Generate "add esp, <stack-size>" */
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
+ }
+ }
+
+ genPopCalleeSavedRegisters();
+ }
+ else
+ {
+ noway_assert(doubleAlignOrFramePointerUsed());
+
+ /* Tear down the stack frame */
+
+ bool needMovEspEbp = false;
+
+#if DOUBLE_ALIGN
+ if (compiler->genDoubleAlign())
+ {
+ //
+ // add esp, compLclFrameSize
+ //
+ // We need not do anything (except the "mov esp, ebp") if
+ // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
+ // also complicates the code manager. Hence, we ignore that case.
+
+ noway_assert(compiler->compLclFrameSize != 0);
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
+
+ needMovEspEbp = true;
+ }
+ else
+#endif // DOUBLE_ALIGN
+ {
+ bool needLea = false;
+
+ if (compiler->compLocallocUsed)
+ {
+ // ESP may be variable if a localloc was actually executed. Reset it.
+ // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
+
+ needLea = true;
+ }
+ else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
+ {
+ if (compiler->compLclFrameSize != 0)
+ {
+#ifdef _TARGET_AMD64_
+ // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
+ // do an LEA to "pop off" the frame allocation.
+ needLea = true;
+#else // !_TARGET_AMD64_
+ // We will just generate "mov esp, ebp" and be done with it.
+ needMovEspEbp = true;
+#endif // !_TARGET_AMD64_
+ }
+ }
+ else if (compiler->compLclFrameSize == 0)
+ {
+ // do nothing before popping the callee-saved registers
+ }
+#ifdef _TARGET_X86_
+ else if (compiler->compLclFrameSize == REGSIZE_BYTES)
+ {
+ // "pop ecx" will make ESP point to the callee-saved registers
+ inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(REG_ECX);
+ }
+#endif // _TARGET_X86
+ else
+ {
+ // We need to make ESP point to the callee-saved registers
+ needLea = true;
+ }
+
+ if (needLea)
+ {
+ int offset;
+
+#ifdef _TARGET_AMD64_
+ // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
+ //
+ // Case 1: localloc not used.
+ // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
+ // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
+ // The amount to be subtracted from RBP to point at callee saved int regs.
+ //
+ // Case 2: localloc used
+ // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
+ // Offset = Amount to be aded to RBP to point at callee saved int regs.
+ offset = genSPtoFPdelta() - compiler->compLclFrameSize;
+
+ // Offset should fit within a byte if localloc is not used.
+ if (!compiler->compLocallocUsed)
+ {
+ noway_assert(offset < UCHAR_MAX);
+ }
+#else
+ // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
+ offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
+ noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
+#endif
+
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
+ }
+ }
+
+ //
+ // Pop the callee-saved registers (if any)
+ //
+
+ genPopCalleeSavedRegisters();
+
+#ifdef _TARGET_AMD64_
+ assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
+#else // !_TARGET_AMD64_
+ if (needMovEspEbp)
+ {
+ // mov esp, ebp
+ inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
+ }
+#endif // !_TARGET_AMD64_
+
+ // pop ebp
+ inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+ }
+
+ getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
+
+ /* Check if this a special return block i.e.
+ * CEE_JMP instruction */
+
+ if (jmpEpilog)
+ {
+ noway_assert(block->bbJumpKind == BBJ_RETURN);
+ noway_assert(block->bbTreeList);
+
+ // figure out what jump we have
+ GenTree* jmpNode = block->lastNode();
+#if !FEATURE_FASTTAILCALL
+ // x86
+ noway_assert(jmpNode->gtOper == GT_JMP);
+#else
+ // amd64
+ // If jmpNode is GT_JMP then gtNext must be null.
+ // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+ noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+ // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+ noway_assert((jmpNode->gtOper == GT_JMP) ||
+ ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+ // The next block is associated with this "if" stmt
+ if (jmpNode->gtOper == GT_JMP)
+#endif
+ {
+ // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ // the same descriptor with some minor adjustments.
+ CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+ if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
+ {
+ NO_WAY("Unsupported JMP indirection");
+ }
+
+ const emitter::EmitCallType callType =
+ (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
+
+ // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ // the same descriptor with some minor adjustments.
+ getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addrInfo.addr,
+ 0, // argSize
+ EA_UNKNOWN // retSize
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), // secondRetSize
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA,
+ 0, 0, /* iloffset, ireg, xreg, xmul, disp */
+ true); /* isJump */
+ }
+#if FEATURE_FASTTAILCALL
+ else
+ {
+#ifdef _TARGET_AMD64_
+ // Fast tail call.
+ // Call target = RAX.
+ // Stack walker requires that a register indirect tail call be rex.w prefixed.
+ getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
+#else
+ assert(!"Fast tail call as epilog+jmp");
+ unreached();
+#endif //_TARGET_AMD64_
+ }
+#endif // FEATURE_FASTTAILCALL
+ }
+ else
+ {
+ unsigned stkArgSize = 0; // Zero on all platforms except x86
+
+#if defined(_TARGET_X86_)
+
+ noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * sizeof(void*));
+ stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
+
+ noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
+
+ // varargs has caller pop
+ if (compiler->info.compIsVarArgs)
+ stkArgSize = 0;
+
+#endif // defined(_TARGET_X86_)
+
+ /* Return, popping our arguments (if any) */
+ instGen_Return(stkArgSize);
+ }
+}
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+#if FEATURE_EH_FUNCLETS
+
+#ifdef _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ *
+ * Funclets have the following incoming arguments:
+ *
+ * catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
+ * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
+ * finally/fault: none
+ *
+ * Funclets set the following registers on exit:
+ *
+ * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ * finally/fault: none
+ *
+ * The ARM funclet prolog sequence is:
+ *
+ * push {regs,lr} ; We push the callee-saved regs and 'lr'.
+ * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
+ * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
+ * ; calculated for the entire function.
+ * sub sp, XXX ; Establish the rest of the frame.
+ * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
+ * ; up to preserve stack alignment. If we push an odd number of registers, we also
+ * ; generate this, to keep the stack aligned.
+ *
+ * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ * ; filters.
+ * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ * ; epilog.
+ *
+ * if (this is a filter funclet)
+ * {
+ * // r1 on entry to a filter funclet is CallerSP of the containing function:
+ * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
+ * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
+ * // a funclet. Consider:
+ * //
+ * // try {
+ * // try {
+ * // throw new Exception();
+ * // } catch(Exception) {
+ * // throw new Exception(); // The exception thrown here ...
+ * // }
+ * // } filter { // ... will be processed here, while the "catch" funclet frame is
+ * // // still on the stack
+ * // } filter-handler {
+ * // }
+ * //
+ * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
+ * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
+ * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
+ *
+ * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
+ * ; the dynamically containing funclet or function)
+ * str r1, [sp + PSP_slot_SP_offset] ; store the PSP
+ * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
+ * }
+ * else
+ * {
+ * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
+ * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
+ *
+ * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
+ * str r3, [sp + PSP_slot_SP_offset] ; store the PSP
+ * }
+ *
+ * The epilog sequence is then:
+ *
+ * add sp, XXX ; if necessary
+ * pop {regs,pc}
+ *
+ * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
+ * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers |
+ * |-----------------------|
+ * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
+ * | | // in function and funclet
+ * |-----------------------|
+ * | PSP slot |
+ * |-----------------------|
+ * ~ possible 4 byte pad ~
+ * ~ for alignment ~
+ * |-----------------------|
+ * | Outgoing arg space |
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFuncletProlog()\n");
+#endif
+
+ assert(block != NULL);
+ assert(block->bbFlags && BBF_FUNCLET_BEG);
+
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ gcInfo.gcResetForBB();
+
+ compiler->unwindBegProlog();
+
+ regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+ regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
+
+ regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
+ maskPushRegsInt |= maskStackAlloc;
+
+ assert(FitsIn<int>(maskPushRegsInt));
+ inst_IV(INS_push, (int)maskPushRegsInt);
+ compiler->unwindPushMaskInt(maskPushRegsInt);
+
+ if (maskPushRegsFloat != RBM_NONE)
+ {
+ genPushFltRegs(maskPushRegsFloat);
+ compiler->unwindPushMaskFloat(maskPushRegsFloat);
+ }
+
+ bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
+
+ regMaskTP maskArgRegsLiveIn;
+ if (isFilter)
+ {
+ maskArgRegsLiveIn = RBM_R0 | RBM_R1;
+ }
+ else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+ {
+ maskArgRegsLiveIn = RBM_NONE;
+ }
+ else
+ {
+ maskArgRegsLiveIn = RBM_R0;
+ }
+
+ regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
+ bool initRegZeroed = false;
+
+ if (maskStackAlloc == RBM_NONE)
+ {
+ genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
+ }
+
+ // This is the end of the OS-reported prolog for purposes of unwinding
+ compiler->unwindEndProlog();
+
+ if (isFilter)
+ {
+ // This is the first block of a filter
+
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
+ genFuncletInfo.fiPSP_slot_CallerSP_offset);
+ regTracker.rsTrackRegTrash(REG_R1);
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
+ genFuncletInfo.fiPSP_slot_SP_offset);
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
+ genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+ }
+ else
+ {
+ // This is a non-filter funclet
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
+ genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+ regTracker.rsTrackRegTrash(REG_R3);
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
+ genFuncletInfo.fiPSP_slot_SP_offset);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFuncletEpilog()\n");
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ // Just as for the main function, we delay starting the unwind codes until we have
+ // an instruction which we know needs an unwind code. This is to support code like
+ // this:
+ // movw r3, 0x38e0
+ // add sp, r3
+ // pop {r4,r5,r6,r10,r11,pc}
+ // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
+
+ bool unwindStarted = false;
+
+ /* The saved regs info saves the LR register. We need to pop the PC register to return */
+ assert(genFuncletInfo.fiSaveRegs & RBM_LR);
+
+ regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+ regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
+
+ regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
+ maskPopRegsInt |= maskStackAlloc;
+
+ if (maskStackAlloc == RBM_NONE)
+ {
+ genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
+ }
+
+ if (!unwindStarted)
+ {
+ // We'll definitely generate an unwindable instruction next
+ compiler->unwindBegEpilog();
+ unwindStarted = true;
+ }
+
+ maskPopRegsInt &= ~RBM_LR;
+ maskPopRegsInt |= RBM_PC;
+
+ if (maskPopRegsFloat != RBM_NONE)
+ {
+ genPopFltRegs(maskPopRegsFloat);
+ compiler->unwindPopMaskFloat(maskPopRegsFloat);
+ }
+
+ assert(FitsIn<int>(maskPopRegsInt));
+ inst_IV(INS_pop, (int)maskPopRegsInt);
+ compiler->unwindPopMaskInt(maskPopRegsInt);
+
+ compiler->unwindEndEpilog();
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ * Note that all funclet prologs are identical, and all funclet epilogs are
+ * identical (per type: filters are identical, and non-filters are identical).
+ * Thus, we compute the data used for these just once.
+ *
+ * See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (compiler->ehAnyFunclets())
+ {
+ assert(isFramePointerUsed());
+ assert(compiler->lvaDoneFrameLayout ==
+ Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+
+ // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
+ // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
+ // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
+ // (also assumed in genFnProlog()).
+ assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
+ unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+ genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
+
+ regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
+ unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
+ unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
+ assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+ unsigned funcletFrameSize =
+ preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
+
+ unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
+ unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+ unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
+
+ unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
+ int PSP_slot_CallerSP_offset =
+ -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
+
+ /* Now save it for future use */
+
+ genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
+ genFuncletInfo.fiSpDelta = spDelta;
+ genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
+ genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n");
+ printf("Funclet prolog / epilog info\n");
+ printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+ printf(" Save regs: ");
+ dspRegMask(rsMaskSaveRegs);
+ printf("\n");
+ printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
+ printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
+ printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
+
+ if (PSP_slot_CallerSP_offset !=
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+ printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+ }
+#endif // DEBUG
+
+ assert(PSP_slot_CallerSP_offset < 0);
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+ assert(PSP_slot_CallerSP_offset == compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset
+ // used in main
+ // function and
+ // funclet!
+ }
+}
+
+#elif defined(_TARGET_AMD64_)
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ *
+ * Funclets have the following incoming arguments:
+ *
+ * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
+ * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
+ * finally/fault: rcx = InitialSP
+ *
+ * Funclets set the following registers on exit:
+ *
+ * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
+ * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ * finally/fault: none
+ *
+ * The AMD64 funclet prolog sequence is:
+ *
+ * push ebp
+ * push callee-saved regs
+ * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
+ * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
+ * ; the entire function.
+ * sub sp, XXX ; Establish the rest of the frame.
+ * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
+ * ; up to preserve stack alignment. If we push an odd number of registers, we also
+ * ; generate this, to keep the stack aligned.
+ *
+ * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ * ; filters.
+ * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ * ; epilog.
+ * ; Also, re-establish the frame pointer from the PSP.
+ *
+ * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
+ * ; PSP of the dynamically containing funclet or function)
+ * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
+ * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
+ * ; Function_InitialSP_to_FP_delta==0, we don't need this
+ * ; instruction.
+ *
+ * The epilog sequence is then:
+ *
+ * add rsp, XXX
+ * pop callee-saved regs ; if necessary
+ * pop rbp
+ * ret
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * | Return address |
+ * |-----------------------|
+ * | Saved EBP |
+ * |-----------------------|
+ * |Callee saved registers |
+ * |-----------------------|
+ * ~ possible 8 byte pad ~
+ * ~ for alignment ~
+ * |-----------------------|
+ * | PSP slot |
+ * |-----------------------|
+ * | Outgoing arg space | // this only exists if the function makes a call
+ * |-----------------------| <---- Initial SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
+ * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
+ * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
+ * "FRAMEPTR OFFSETS" for details.
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFuncletProlog()\n");
+ }
+#endif
+
+ assert(!regSet.rsRegsModified(RBM_FPBASE));
+ assert(block != nullptr);
+ assert(block->bbFlags & BBF_FUNCLET_BEG);
+ assert(isFramePointerUsed());
+
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ gcInfo.gcResetForBB();
+
+ compiler->unwindBegProlog();
+
+ // We need to push ebp, since it's callee-saved.
+ // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
+ // keep track of that on a per-funclet basis, so we push the same set as in the main function.
+ // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
+ // is stored here (all temps are allocated in the parent frame).
+ // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
+ // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
+
+ inst_RV(INS_push, REG_FPBASE, TYP_REF);
+ compiler->unwindPush(REG_FPBASE);
+
+ // Callee saved int registers are pushed to stack.
+ genPushCalleeSavedRegisters();
+
+ regMaskTP maskArgRegsLiveIn;
+ if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+ {
+ maskArgRegsLiveIn = RBM_ARG_0;
+ }
+ else
+ {
+ maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
+ }
+
+ regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
+ bool initRegZeroed = false;
+
+ genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
+
+ // Callee saved float registers are copied to stack in their assigned stack slots
+ // after allocating space for them as part of funclet frame.
+ genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
+
+ // This is the end of the OS-reported prolog for purposes of unwinding
+ compiler->unwindEndProlog();
+
+ getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
+
+ regTracker.rsTrackRegTrash(REG_FPBASE);
+
+ getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
+
+ if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
+ {
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
+ genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
+ }
+
+ // We've modified EBP, but not really. Say that we haven't...
+ regSet.rsRemoveRegsModified(RBM_FPBASE);
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ *
+ * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFuncletEpilog()\n");
+ }
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ // Restore callee saved XMM regs from their stack slots before modifying SP
+ // to position at callee saved int regs.
+ genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
+ inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
+ genPopCalleeSavedRegisters();
+ inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+ instGen_Return(0);
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (!compiler->ehAnyFunclets())
+ {
+ return;
+ }
+
+ // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
+ // because we're not going to allocate the same size frame as the parent.
+
+ assert(isFramePointerUsed());
+ assert(compiler->lvaDoneFrameLayout ==
+ Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+ assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
+
+ // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
+ // that's ok, because we're figuring out an offset in the parent frame.
+ genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
+ compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
+ // pointer.
+
+ assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+#ifndef UNIX_AMD64_ABI
+ // No 4 slots for outgoing params on the stack for System V systems.
+ assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
+ (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
+// slots if there are any calls in the function.
+#endif // UNIX_AMD64_ABI
+ unsigned offset = compiler->lvaOutgoingArgSpaceSize;
+
+ genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
+
+ // How much stack do we allocate in the funclet?
+ // We need to 16-byte align the stack.
+
+ unsigned totalFrameSize =
+ REGSIZE_BYTES // return address
+ + REGSIZE_BYTES // pushed EBP
+ + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
+
+ // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
+ // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
+ unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
+ unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
+
+ totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
+ + calleeFPRegsSavedSize // pushed callee-saved float regs
+ // below calculated 'pad' will go here
+ + REGSIZE_BYTES // PSPSym
+ + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
+ ;
+
+ unsigned pad = AlignmentPad(totalFrameSize, 16);
+
+ genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
+ + calleeFPRegsSavedSize // Callee saved xmm regs
+ + pad + REGSIZE_BYTES // PSPSym
+ + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
+ ;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n");
+ printf("Funclet prolog / epilog info\n");
+ printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
+ printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
+ printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
+ }
+#endif // DEBUG
+
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+ assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
+ compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
+ // funclet!
+}
+
+#elif defined(_TARGET_ARM64_)
+
+// Look in CodeGenArm64.cpp
+
+#else // _TARGET_*
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+ NYI("Funclet prolog");
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+ NYI("Funclet epilog");
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (compiler->ehAnyFunclets())
+ {
+ NYI("genCaptureFuncletPrologEpilogInfo()");
+ }
+}
+
+#endif // _TARGET_*
+
+/*-----------------------------------------------------------------------------
+ *
+ * Set the main function PSPSym value in the frame.
+ * Funclets use different code to load the PSP sym and save it in their frame.
+ * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
+ * The PSPSym section of that document is copied here.
+ *
+ ***********************************
+ * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
+ * accesses locals from the main function body.
+ *
+ * First, two definitions.
+ *
+ * Caller-SP is the value of the stack pointer in a function's caller before the call
+ * instruction is executed. That is, when function A calls function B, Caller-SP for B
+ * is the value of the stack pointer immediately before the call instruction in A
+ * (calling B) was executed. Note that this definition holds for both AMD64, which
+ * pushes the return value when a call instruction is executed, and for ARM, which
+ * doesn't. For AMD64, Caller-SP is the address above the call return address.
+ *
+ * Initial-SP is the initial value of the stack pointer after the fixed-size portion of
+ * the frame has been allocated. That is, before any "alloca"-type allocations.
+ *
+ * The PSPSym is a pointer-sized local variable in the frame of the main function and
+ * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
+ * for the main function. The stack offset of the PSPSym is reported to the VM in the
+ * GC information header. The value reported in the GC information is the offset of the
+ * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
+ * value is reported to the VM, differs between architectures. In particular, note that
+ * most things in the GC information header are reported as offsets relative to Caller-SP,
+ * but PSPSym on AMD64 is one (maybe the only) exception.)
+ *
+ * The VM uses the PSPSym to find other locals it cares about (such as the generics context
+ * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
+ * the frame pointer is the same value in a funclet as it is in the main function body.
+ *
+ * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
+ * true for all funclets and it is passed as the first argument in RCX, but for ARM this is
+ * only true for first pass funclets (currently just filters) and it is passed as the second
+ * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
+ * frame in the exception processing system. For the CLR, it points either to the main function
+ * frame or a dynamically enclosing funclet frame from the same function, for the funclet being
+ * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
+ *
+ * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
+ * don't know if the Establisher Frame is from the main function or a funclet, we design the
+ * main function and funclet frame layouts to place the PSPSym at an identical, small, constant
+ * offset from the Establisher Frame in each case. (This is also required because we only report
+ * a single offset to the PSPSym in the GC information, and that offset must be valid for the main
+ * function and all of its funclets). Then, the funclet uses this known offset to compute the
+ * PSPSym address and read its value. From this, it can compute the value of the frame pointer
+ * (which is a constant offset from the PSPSym value) and set the frame register to be the same
+ * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
+ * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
+ * for every nested funclet invocation.
+ *
+ * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
+ * restores all non-volatile registers to their values within the parent frame. This includes
+ * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
+ * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
+ *
+ * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
+ * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
+ * ARM this is the first argument and passed in R0.
+ *
+ * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
+ * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
+ * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
+ * is required in all funclets as well as the main function, whereas if the establisher frame was
+ * correctly reported, the PSPSym could be omitted in some cases.)
+ ***********************************
+ */
+void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (!compiler->ehNeedsPSPSym())
+ {
+ return;
+ }
+
+ noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable
+
+#if defined(_TARGET_ARM_)
+
+ // We either generate:
+ // add r1, r11, 8
+ // str r1, [reg + PSPSymOffset]
+ // or:
+ // add r1, sp, 76
+ // str r1, [reg + PSPSymOffset]
+ // depending on the smallest encoding
+
+ int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+ int callerSPOffs;
+ regNumber regBase;
+
+ if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
+ {
+ // use the "add <reg>, sp, imm" form
+
+ callerSPOffs = SPtoCallerSPdelta;
+ regBase = REG_SPBASE;
+ }
+ else
+ {
+ // use the "add <reg>, r11, imm" form
+
+ int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
+ noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
+
+ callerSPOffs = FPtoCallerSPdelta;
+ regBase = REG_FPBASE;
+ }
+
+ // We will just use the initReg since it is an available register
+ // and we are probably done using it anyway...
+ regNumber regTmp = initReg;
+ *pInitRegZeroed = false;
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
+
+#elif defined(_TARGET_ARM64_)
+
+ int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+ // We will just use the initReg since it is an available register
+ // and we are probably done using it anyway...
+ regNumber regTmp = initReg;
+ *pInitRegZeroed = false;
+
+ getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
+
+#elif defined(_TARGET_AMD64_)
+
+ // The PSP sym value is Initial-SP, not Caller-SP!
+ // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
+ // has been established.
+ //
+ // We generate:
+ // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
+
+#else // _TARGET_*
+
+ NYI("Set function PSP sym");
+
+#endif // _TARGET_*
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Generates code for all the function and funclet prologs and epilogs.
+ */
+
+void CodeGen::genGeneratePrologsAndEpilogs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** Before prolog / epilog generation\n");
+ getEmitter()->emitDispIGlist(false);
+ }
+#endif
+
+#ifndef LEGACY_BACKEND
+ // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
+ // This affects our code that determines which untracked locals need to be zero initialized.
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
+#endif // !LEGACY_BACKEND
+
+ // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
+
+ getEmitter()->emitStartPrologEpilogGeneration();
+
+ gcInfo.gcResetForBB();
+ genFnProlog();
+
+ // Generate all the prologs and epilogs.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+
+ // Capture the data we're going to use in the funclet prolog and epilog generation. This is
+ // information computed during codegen, or during function prolog generation, like
+ // frame offsets. It must run after main function prolog generation.
+
+ genCaptureFuncletPrologEpilogInfo();
+
+#endif // FEATURE_EH_FUNCLETS
+
+ // Walk the list of prologs and epilogs and generate them.
+ // We maintain a list of prolog and epilog basic blocks in
+ // the insGroup structure in the emitter. This list was created
+ // during code generation by the genReserve*() functions.
+ //
+ // TODO: it seems like better design would be to create a list of prologs/epilogs
+ // in the code generator (not the emitter), and then walk that list. But we already
+ // have the insGroup list, which serves well, so we don't need the extra allocations
+ // for a prolog/epilog list in the code generator.
+
+ getEmitter()->emitGeneratePrologEpilog();
+
+ // Tell the emitter we're done with all prolog and epilog generation.
+
+ getEmitter()->emitFinishPrologEpilogGeneration();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** After prolog / epilog generation\n");
+ getEmitter()->emitDispIGlist(false);
+ }
+#endif
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX End Prolog / Epilog XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if STACK_PROBES
+void CodeGen::genGenerateStackProbe()
+{
+ noway_assert(compiler->opts.compNeedStackProbes);
+
+ // If this assert fires, it means somebody has changed the value
+ // CORINFO_STACKPROBE_DEPTH.
+ // Why does the EE need such a deep probe? It should just need a couple
+ // of bytes, to set up a frame in the unmanaged code..
+
+ static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
+
+ JITDUMP("Emitting stack probe:\n");
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
+ -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
+}
+#endif // STACK_PROBES
+
+/*****************************************************************************
+ *
+ * Record the constant and return a tree node that yields its address.
+ */
+
+GenTreePtr CodeGen::genMakeConst(const void* cnsAddr, var_types cnsType, GenTreePtr cnsTree, bool dblAlign)
+{
+ // Assign the constant an offset in the data section
+ UNATIVE_OFFSET cnsSize = genTypeSize(cnsType);
+ UNATIVE_OFFSET cnum = getEmitter()->emitDataConst(cnsAddr, cnsSize, dblAlign);
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf(" @%s%02u ", "CNS", cnum);
+
+ switch (cnsType)
+ {
+ case TYP_INT:
+ printf("DD %d \n", *(int*)cnsAddr);
+ break;
+ case TYP_LONG:
+ printf("DQ %lld\n", *(__int64*)cnsAddr);
+ break;
+ case TYP_FLOAT:
+ printf("DF %f \n", *(float*)cnsAddr);
+ break;
+ case TYP_DOUBLE:
+ printf("DQ %lf\n", *(double*)cnsAddr);
+ break;
+
+ default:
+ noway_assert(!"unexpected constant type");
+ }
+ }
+#endif
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+
+ return new (compiler, GT_CLS_VAR) GenTreeClsVar(cnsType, compiler->eeFindJitDataOffs(cnum), nullptr);
+}
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+// Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
+// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
+// Here offset = 16-byte aligned offset after pushing integer registers.
+//
+// Params
+// lclFrameSize - Fixed frame size excluding callee pushed int regs.
+// non-funclet: this will be compLclFrameSize.
+// funclet frames: this will be FuncletInfo.fiSpDelta.
+void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
+{
+ regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
+
+ // Only callee saved floating point registers should be in regMask
+ assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
+
+ // fast path return
+ if (regMask == RBM_NONE)
+ {
+ return;
+ }
+
+#ifdef _TARGET_AMD64_
+ unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
+ unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
+
+ // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
+ assert((offset % 16) == 0);
+ instruction copyIns = ins_Copy(TYP_FLOAT);
+#else // !_TARGET_AMD64_
+ unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
+ instruction copyIns = INS_movupd;
+#endif // !_TARGET_AMD64_
+
+ for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
+ {
+ regMaskTP regBit = genRegMask(reg);
+ if ((regBit & regMask) != 0)
+ {
+ // ABI requires us to preserve lower 128-bits of YMM register.
+ getEmitter()->emitIns_AR_R(copyIns,
+ EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
+ // EA_16BYTE
+ reg, REG_SPBASE, offset);
+ compiler->unwindSaveReg(reg, offset);
+ regMask &= ~regBit;
+ offset -= XMM_REGSIZE_BYTES;
+ }
+ }
+
+#ifdef FEATURE_AVX_SUPPORT
+ // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
+ // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
+ // using SSE2.
+ if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
+ {
+ instGen(INS_vzeroupper);
+ }
+#endif
+}
+
+// Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
+// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
+// Here offset = 16-byte aligned offset after pushing integer registers.
+//
+// Params
+// lclFrameSize - Fixed frame size excluding callee pushed int regs.
+// non-funclet: this will be compLclFrameSize.
+// funclet frames: this will be FuncletInfo.fiSpDelta.
+void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
+{
+ regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
+
+ // Only callee saved floating point registers should be in regMask
+ assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
+
+ // fast path return
+ if (regMask == RBM_NONE)
+ {
+ return;
+ }
+
+#ifdef _TARGET_AMD64_
+ unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
+ instruction copyIns = ins_Copy(TYP_FLOAT);
+#else // !_TARGET_AMD64_
+ unsigned firstFPRegPadding = 0;
+ instruction copyIns = INS_movupd;
+#endif // !_TARGET_AMD64_
+
+ unsigned offset;
+ regNumber regBase;
+ if (compiler->compLocallocUsed)
+ {
+ // localloc frame: use frame pointer relative offset
+ assert(isFramePointerUsed());
+ regBase = REG_FPBASE;
+ offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
+ }
+ else
+ {
+ regBase = REG_SPBASE;
+ offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
+ }
+
+#ifdef _TARGET_AMD64_
+ // Offset is 16-byte aligned since we use movaps for restoring xmm regs
+ assert((offset % 16) == 0);
+#endif // _TARGET_AMD64_
+
+#ifdef FEATURE_AVX_SUPPORT
+ // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
+ // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
+ // using SSE2.
+ if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
+ {
+ instGen(INS_vzeroupper);
+ }
+#endif
+
+ for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
+ {
+ regMaskTP regBit = genRegMask(reg);
+ if ((regBit & regMask) != 0)
+ {
+ // ABI requires us to restore lower 128-bits of YMM register.
+ getEmitter()->emitIns_R_AR(copyIns,
+ EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
+ // EA_16BYTE
+ reg, regBase, offset);
+ regMask &= ~regBit;
+ offset -= XMM_REGSIZE_BYTES;
+ }
+ }
+}
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+//-----------------------------------------------------------------------------------
+// IsMultiRegPassedType: Returns true if the type is returned in multiple registers
+//
+// Arguments:
+// hClass - type handle
+//
+// Return Value:
+// true if type is passed in multiple registers, false otherwise.
+//
+bool Compiler::IsMultiRegPassedType(CORINFO_CLASS_HANDLE hClass)
+{
+ if (hClass == NO_CLASS_HANDLE)
+ {
+ return false;
+ }
+
+ structPassingKind howToPassStruct;
+ var_types returnType = getArgTypeForStruct(hClass, &howToPassStruct);
+
+ return (returnType == TYP_STRUCT);
+}
+
+//-----------------------------------------------------------------------------------
+// IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
+//
+// Arguments:
+// hClass - type handle
+//
+// Return Value:
+// true if type is returned in multiple registers, false otherwise.
+//
+bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
+{
+ if (hClass == NO_CLASS_HANDLE)
+ {
+ return false;
+ }
+
+ structPassingKind howToReturnStruct;
+ var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
+
+ return (returnType == TYP_STRUCT);
+}
+
+//----------------------------------------------
+// Methods that support HFA's for ARM32/ARM64
+//----------------------------------------------
+
+bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
+{
+#ifdef FEATURE_HFA
+ return varTypeIsFloating(GetHfaType(hClass));
+#else
+ return false;
+#endif
+}
+
+bool Compiler::IsHfa(GenTreePtr tree)
+{
+#ifdef FEATURE_HFA
+ return IsHfa(gtGetStructHandleIfPresent(tree));
+#else
+ return false;
+#endif
+}
+
+var_types Compiler::GetHfaType(GenTreePtr tree)
+{
+#ifdef FEATURE_HFA
+ if (tree->TypeGet() == TYP_STRUCT)
+ {
+ return GetHfaType(gtGetStructHandleIfPresent(tree));
+ }
+#endif
+ return TYP_UNDEF;
+}
+
+unsigned Compiler::GetHfaCount(GenTreePtr tree)
+{
+ return GetHfaCount(gtGetStructHandleIfPresent(tree));
+}
+
+var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
+{
+ var_types result = TYP_UNDEF;
+ if (hClass != NO_CLASS_HANDLE)
+ {
+#ifdef FEATURE_HFA
+ CorInfoType corType = info.compCompHnd->getHFAType(hClass);
+ if (corType != CORINFO_TYPE_UNDEF)
+ {
+ result = JITtype2varType(corType);
+ }
+#endif // FEATURE_HFA
+ }
+ return result;
+}
+
+//------------------------------------------------------------------------
+// GetHfaCount: Given a class handle for an HFA struct
+// return the number of registers needed to hold the HFA
+//
+// Note that on ARM32 the single precision registers overlap with
+// the double precision registers and for that reason each
+// double register is considered to be two single registers.
+// Thus for ARM32 an HFA of 4 doubles this function will return 8.
+// On ARM64 given an HFA of 4 singles or 4 doubles this function will
+// will return 4 for both.
+// Arguments:
+// hClass: the class handle of a HFA struct
+//
+unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
+{
+ assert(IsHfa(hClass));
+#ifdef _TARGET_ARM_
+ // A HFA of doubles is twice as large as an HFA of singles for ARM32
+ // (i.e. uses twice the number of single precison registers)
+ return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
+#else // _TARGET_ARM64_
+ var_types hfaType = GetHfaType(hClass);
+ unsigned classSize = info.compCompHnd->getClassSize(hClass);
+ // Note that the retail build issues a warning about a potential divsion by zero without the Max function
+ unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
+ return classSize / elemSize;
+#endif // _TARGET_ARM64_
+}
+
+#ifdef _TARGET_XARCH_
+
+//------------------------------------------------------------------------
+// genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
+// map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
+// X86/x64 has a special encoding for shift/rotate-by-constant-1.
+//
+// Arguments:
+// ins: the base shift/rotate instruction
+// shiftByValue: the constant value by which we are shifting/rotating
+//
+instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
+{
+ assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
+ ins == INS_sar);
+
+ // Which format should we use?
+
+ instruction shiftByConstantIns;
+
+ if (shiftByValue == 1)
+ {
+ // Use the shift-by-one format.
+
+ assert(INS_rcl + 1 == INS_rcl_1);
+ assert(INS_rcr + 1 == INS_rcr_1);
+ assert(INS_rol + 1 == INS_rol_1);
+ assert(INS_ror + 1 == INS_ror_1);
+ assert(INS_shl + 1 == INS_shl_1);
+ assert(INS_shr + 1 == INS_shr_1);
+ assert(INS_sar + 1 == INS_sar_1);
+
+ shiftByConstantIns = (instruction)(ins + 1);
+ }
+ else
+ {
+ // Use the shift-by-NNN format.
+
+ assert(INS_rcl + 2 == INS_rcl_N);
+ assert(INS_rcr + 2 == INS_rcr_N);
+ assert(INS_rol + 2 == INS_rol_N);
+ assert(INS_ror + 2 == INS_ror_N);
+ assert(INS_shl + 2 == INS_shl_N);
+ assert(INS_shr + 2 == INS_shr_N);
+ assert(INS_sar + 2 == INS_sar_N);
+
+ shiftByConstantIns = (instruction)(ins + 2);
+ }
+
+ return shiftByConstantIns;
+}
+
+#endif // _TARGET_XARCH_
+
+#if !defined(LEGACY_BACKEND) && (defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_))
+
+//------------------------------------------------------------------------------------------------ //
+// getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
+//
+// Return value:
+// The number of the first argument with stack slot on the caller's frame.
+//
+// Note:
+// On x64 Windows the caller always creates slots (homing space) in its frame for the
+// first 4 arguments of a callee (register passed args). So, the the variable number
+// (lclNum) for the first argument with a stack slot is always 0.
+// For System V systems or arm64, there is no such calling convention requirement, and the code needs to find
+// the first stack passed argument from the caller. This is done by iterating over
+// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
+//
+unsigned CodeGen::getFirstArgWithStackSlot()
+{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
+ unsigned baseVarNum = 0;
+#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
+ baseVarNum = compiler->lvaFirstStackIncomingArgNum;
+
+ if (compiler->lvaFirstStackIncomingArgNum != BAD_VAR_NUM)
+ {
+ baseVarNum = compiler->lvaFirstStackIncomingArgNum;
+ }
+ else
+#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
+ {
+ // Iterate over all the local variables in the Lcl var table.
+ // They contain all the implicit arguments - thisPtr, retBuf,
+ // generic context, PInvoke cookie, var arg cookie,no-standard args, etc.
+ LclVarDsc* varDsc = nullptr;
+ for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
+ {
+ varDsc = &(compiler->lvaTable[i]);
+
+ // We are iterating over the arguments only.
+ assert(varDsc->lvIsParam);
+
+ if (varDsc->lvArgReg == REG_STK)
+ {
+ baseVarNum = i;
+#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
+ compiler->lvaFirstStackIncomingArgNum = baseVarNum;
+#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
+ break;
+ }
+ }
+ assert(varDsc != nullptr);
+ }
+
+ return baseVarNum;
+#elif defined(_TARGET_AMD64_)
+ return 0;
+#else
+ // Not implemented for x86.
+ NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
+ return BAD_VAR_NUM;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
+}
+
+#endif // !LEGACY_BACKEND && (_TARGET_XARCH_ || _TARGET_ARM64_)
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * This function should be called only after the sizes of the emitter blocks
+ * have been finalized.
+ */
+
+void CodeGen::genSetScopeInfo()
+{
+ if (!compiler->opts.compScopeInfo)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genSetScopeInfo()\n");
+ }
+#endif
+
+ if (compiler->info.compVarScopesCount == 0)
+ {
+ compiler->eeSetLVcount(0);
+ compiler->eeSetLVdone();
+ return;
+ }
+
+ noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
+ noway_assert(psiOpenScopeList.scNext == nullptr);
+
+ unsigned i;
+ unsigned scopeCnt = siScopeCnt + psiScopeCnt;
+
+ compiler->eeSetLVcount(scopeCnt);
+
+#ifdef DEBUG
+ genTrnslLocalVarCount = scopeCnt;
+ if (scopeCnt)
+ {
+ genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
+ }
+#endif
+
+ // Record the scopes found for the parameters over the prolog.
+ // The prolog needs to be treated differently as a variable may not
+ // have the same info in the prolog block as is given by compiler->lvaTable.
+ // eg. A register parameter is actually on the stack, before it is loaded to reg.
+
+ CodeGen::psiScope* scopeP;
+
+ for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
+ {
+ noway_assert(scopeP != nullptr);
+ noway_assert(scopeP->scStartLoc.Valid());
+ noway_assert(scopeP->scEndLoc.Valid());
+
+ UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
+ UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
+
+ unsigned varNum = scopeP->scSlotNum;
+ noway_assert(startOffs <= endOffs);
+
+ // The range may be 0 if the prolog is empty. For such a case,
+ // report the liveness of arguments to span at least the first
+ // instruction in the method. This will be incorrect (except on
+ // entry to the method) if the very first instruction of the method
+ // is part of a loop. However, this should happen
+ // very rarely, and the incorrectness is worth being able to look
+ // at the argument on entry to the method.
+ if (startOffs == endOffs)
+ {
+ noway_assert(startOffs == 0);
+ endOffs++;
+ }
+
+ Compiler::siVarLoc varLoc;
+
+ if (scopeP->scRegister)
+ {
+ varLoc.vlType = Compiler::VLT_REG;
+ varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
+ }
+ else
+ {
+ varLoc.vlType = Compiler::VLT_STK;
+ varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
+ varLoc.vlStk.vlsOffset = scopeP->u2.scOffset;
+ }
+
+ genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
+ }
+
+ // Record the scopes for the rest of the method.
+ // Check that the LocalVarInfo scopes look OK
+ noway_assert(siOpenScopeList.scNext == nullptr);
+
+ CodeGen::siScope* scopeL;
+
+ for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
+ {
+ noway_assert(scopeL != nullptr);
+ noway_assert(scopeL->scStartLoc.Valid());
+ noway_assert(scopeL->scEndLoc.Valid());
+
+ // Find the start and end IP
+
+ UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
+ UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
+
+ noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
+
+ // For stack vars, find the base register, and offset
+
+ regNumber baseReg;
+ signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
+
+ if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
+ {
+ baseReg = REG_SPBASE;
+ offset += scopeL->scStackLevel;
+ }
+ else
+ {
+ baseReg = REG_FPBASE;
+ }
+
+ // Now fill in the varLoc
+
+ Compiler::siVarLoc varLoc;
+
+ // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
+ // for part of its lifetime, or in different registers for different parts of its lifetime.
+ // This should only matter for non-debug code, where we do variable enregistration.
+ // We should store the ranges of variable enregistration in the scope table.
+ if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
+ {
+ var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
+ switch (type)
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+#ifdef _TARGET_64BIT_
+ case TYP_LONG:
+#endif // _TARGET_64BIT_
+
+ varLoc.vlType = Compiler::VLT_REG;
+ varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ break;
+
+#ifndef _TARGET_64BIT_
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+
+ if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
+ {
+ varLoc.vlType = Compiler::VLT_REG_REG;
+ varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
+ }
+ else
+ {
+ varLoc.vlType = Compiler::VLT_REG_STK;
+ varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
+ if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
+ {
+ varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+ }
+ varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
+ }
+ break;
+#endif // !_TARGET_64BIT_
+
+#ifdef _TARGET_64BIT_
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
+ // so no XMM registers can get debug information.
+ varLoc.vlType = Compiler::VLT_REG_FP;
+ varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ break;
+
+#else // !_TARGET_64BIT_
+
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ if (isFloatRegType(type))
+ {
+ varLoc.vlType = Compiler::VLT_FPSTK;
+ varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ }
+ break;
+#endif // CPU_HAS_FP_SUPPORT
+
+#endif // !_TARGET_64BIT_
+
+#ifdef FEATURE_SIMD
+ case TYP_SIMD8:
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ case TYP_SIMD32:
+ varLoc.vlType = Compiler::VLT_REG_FP;
+
+ // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
+ // so no XMM registers can get debug information.
+ //
+ // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
+ // in eeDispVar() --> getRegName() that regNumber is valid.
+ varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ break;
+#endif // FEATURE_SIMD
+
+ default:
+ noway_assert(!"Invalid type");
+ }
+ }
+ else
+ {
+ assert(offset != BAD_STK_OFFS);
+ LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
+ switch (genActualType(varDsc->TypeGet()))
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+ case TYP_FLOAT:
+ case TYP_STRUCT:
+ case TYP_BLK: // Needed because of the TYP_BLK stress mode
+#ifdef FEATURE_SIMD
+ case TYP_SIMD8:
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ case TYP_SIMD32:
+#endif
+#ifdef _TARGET_64BIT_
+ case TYP_LONG:
+ case TYP_DOUBLE:
+#endif // _TARGET_64BIT_
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // In the AMD64 ABI we are supposed to pass a struct by reference when its
+ // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
+ // the IR to comply with the ABI and therefore changes the type of the lclVar
+ // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
+ // this is still a struct by setting the lvIsTemp flag.
+ // The same is true for ARM64 and structs > 16 bytes.
+ // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
+ // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
+ // to accomodate for this situation.
+ if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
+ {
+ assert(varDsc->lvIsParam);
+ varLoc.vlType = Compiler::VLT_STK_BYREF;
+ }
+ else
+#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ {
+ varLoc.vlType = Compiler::VLT_STK;
+ }
+ varLoc.vlStk.vlsBaseReg = baseReg;
+ varLoc.vlStk.vlsOffset = offset;
+ if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
+ {
+ varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+ }
+ break;
+
+#ifndef _TARGET_64BIT_
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ varLoc.vlType = Compiler::VLT_STK2;
+ varLoc.vlStk2.vls2BaseReg = baseReg;
+ varLoc.vlStk2.vls2Offset = offset;
+ if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
+ {
+ varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+ }
+ break;
+#endif // !_TARGET_64BIT_
+
+ default:
+ noway_assert(!"Invalid type");
+ }
+ }
+
+ genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
+ scopeL->scAvailable, varLoc);
+ }
+
+ compiler->eeSetLVdone();
+}
+
+/*****************************************************************************/
+#ifdef LATE_DISASM
+#if defined(DEBUG)
+/*****************************************************************************
+ * CompilerRegName
+ *
+ * Can be called only after lviSetLocalVarInfo() has been called
+ */
+
+/* virtual */
+const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
+{
+ if (!compiler->opts.compScopeInfo)
+ return nullptr;
+
+ if (compiler->info.compVarScopesCount == 0)
+ return nullptr;
+
+ noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
+
+ for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
+ {
+ if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
+ (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
+ (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
+ {
+ return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
+ }
+ }
+
+ return NULL;
+}
+
+/*****************************************************************************
+ * CompilerStkName
+ *
+ * Can be called only after lviSetLocalVarInfo() has been called
+ */
+
+/* virtual */
+const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
+{
+ if (!compiler->opts.compScopeInfo)
+ return nullptr;
+
+ if (compiler->info.compVarScopesCount == 0)
+ return nullptr;
+
+ noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
+
+ for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
+ {
+ if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
+ (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
+ (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
+ {
+ return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
+ }
+ }
+
+ return NULL;
+}
+
+/*****************************************************************************/
+#endif // defined(DEBUG)
+#endif // LATE_DISASM
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
+ */
+
+void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
+{
+ if (mappingNum != unsigned(-1))
+ {
+ printf("%d: ", mappingNum);
+ }
+
+ IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
+
+ if (offsx == BAD_IL_OFFSET)
+ {
+ printf("???");
+ }
+ else
+ {
+ Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
+
+ if (jitIsStackEmpty(offsx))
+ {
+ printf(" STACK_EMPTY");
+ }
+
+ if (jitIsCallInstruction(offsx))
+ {
+ printf(" CALL_INSTRUCTION");
+ }
+ }
+
+ printf(" ");
+ ipMapping->ipmdNativeLoc.Print();
+ // We can only call this after code generation. Is there any way to tell when it's legal to call?
+ // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
+
+ if (ipMapping->ipmdIsLabel)
+ {
+ printf(" label");
+ }
+
+ printf("\n");
+}
+
+void CodeGen::genIPmappingListDisp()
+{
+ unsigned mappingNum = 0;
+ Compiler::IPmappingDsc* ipMapping;
+
+ for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
+ {
+ genIPmappingDisp(mappingNum, ipMapping);
+ ++mappingNum;
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Append an IPmappingDsc struct to the list that we're maintaining
+ * for the debugger.
+ * Record the instr offset as being at the current code gen position.
+ */
+
+void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
+{
+ if (!compiler->opts.compDbgInfo)
+ {
+ return;
+ }
+
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ break;
+
+ default:
+
+ if (offsx != ICorDebugInfo::NO_MAPPING)
+ {
+ noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
+ }
+
+ // Ignore this one if it's the same IL offset as the last one we saw.
+ // Note that we'll let through two identical IL offsets if the flag bits
+ // differ, or two identical "special" mappings (e.g., PROLOG).
+ if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
+ {
+ JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
+ return;
+ }
+ break;
+ }
+
+ /* Create a mapping entry and append it to the list */
+
+ Compiler::IPmappingDsc* addMapping =
+ (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
+
+ addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
+ addMapping->ipmdILoffsx = offsx;
+ addMapping->ipmdIsLabel = isLabel;
+ addMapping->ipmdNext = nullptr;
+
+ if (compiler->genIPmappingList != nullptr)
+ {
+ assert(compiler->genIPmappingLast != nullptr);
+ assert(compiler->genIPmappingLast->ipmdNext == nullptr);
+ compiler->genIPmappingLast->ipmdNext = addMapping;
+ }
+ else
+ {
+ assert(compiler->genIPmappingLast == nullptr);
+ compiler->genIPmappingList = addMapping;
+ }
+
+ compiler->genIPmappingLast = addMapping;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Added IP mapping: ");
+ genIPmappingDisp(unsigned(-1), addMapping);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Prepend an IPmappingDsc struct to the list that we're maintaining
+ * for the debugger.
+ * Record the instr offset as being at the current code gen position.
+ */
+void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
+{
+ if (!compiler->opts.compDbgInfo)
+ {
+ return;
+ }
+
+ assert(offsx != BAD_IL_OFFSET);
+ assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ break;
+
+ default:
+ noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
+ break;
+ }
+
+ /* Create a mapping entry and prepend it to the list */
+
+ Compiler::IPmappingDsc* addMapping =
+ (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
+
+ addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
+ addMapping->ipmdILoffsx = offsx;
+ addMapping->ipmdIsLabel = true;
+ addMapping->ipmdNext = nullptr;
+
+ addMapping->ipmdNext = compiler->genIPmappingList;
+ compiler->genIPmappingList = addMapping;
+
+ if (compiler->genIPmappingLast == nullptr)
+ {
+ compiler->genIPmappingLast = addMapping;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Added IP mapping to front: ");
+ genIPmappingDisp(unsigned(-1), addMapping);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
+
+C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
+
+//------------------------------------------------------------------------
+// jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
+// Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
+// is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+// offsx - the IL_OFFSETX value with the IL offset to extract.
+//
+// Return Value:
+// The IL offset.
+
+IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
+{
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ unreached();
+
+ default:
+ return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
+ }
+}
+
+//------------------------------------------------------------------------
+// jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
+// distinguished values. Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+// offsx - the IL_OFFSETX value with the IL offset to extract.
+//
+// Return Value:
+// The IL offset.
+
+IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
+{
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ return IL_OFFSET(offsx);
+
+ default:
+ return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
+ }
+}
+
+//------------------------------------------------------------------------
+// jitIsStackEmpty: Does the IL offset have the stack empty bit set?
+// Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+// offsx - the IL_OFFSETX value to check
+//
+// Return Value:
+// 'true' if the stack empty bit is set; 'false' otherwise.
+
+bool jitIsStackEmpty(IL_OFFSETX offsx)
+{
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ return true;
+
+ default:
+ return (offsx & IL_OFFSETX_STKBIT) == 0;
+ }
+}
+
+//------------------------------------------------------------------------
+// jitIsCallInstruction: Does the IL offset have the call instruction bit set?
+// Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+// offsx - the IL_OFFSETX value to check
+//
+// Return Value:
+// 'true' if the call instruction bit is set; 'false' otherwise.
+
+bool jitIsCallInstruction(IL_OFFSETX offsx)
+{
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ return false;
+
+ default:
+ return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
+ }
+}
+
+/*****************************************************************************/
+
+void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
+{
+ if (!compiler->opts.compDbgCode)
+ {
+ return;
+ }
+
+ if (offsx == BAD_IL_OFFSET)
+ {
+ return;
+ }
+
+ /* If other IL were offsets reported, skip */
+
+ if (compiler->genIPmappingLast == nullptr)
+ {
+ return;
+ }
+
+ if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
+ {
+ return;
+ }
+
+ /* offsx was the last reported offset. Make sure that we generated native code */
+
+ if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
+ {
+ instGen(INS_nop);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Shut down the IP-mapping logic, report the info to the EE.
+ */
+
+void CodeGen::genIPmappingGen()
+{
+ if (!compiler->opts.compDbgInfo)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genIPmappingGen()\n");
+ }
+#endif
+
+ if (compiler->genIPmappingList == nullptr)
+ {
+ compiler->eeSetLIcount(0);
+ compiler->eeSetLIdone();
+ return;
+ }
+
+ Compiler::IPmappingDsc* tmpMapping;
+ Compiler::IPmappingDsc* prevMapping;
+ unsigned mappingCnt;
+ UNATIVE_OFFSET lastNativeOfs;
+
+ /* First count the number of distinct mapping records */
+
+ mappingCnt = 0;
+ lastNativeOfs = UNATIVE_OFFSET(~0);
+
+ for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
+ tmpMapping = tmpMapping->ipmdNext)
+ {
+ IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
+
+ // Managed RetVal - since new sequence points are emitted to identify IL calls,
+ // make sure that those are not filtered and do not interfere with filtering of
+ // other sequence points.
+ if (jitIsCallInstruction(srcIP))
+ {
+ mappingCnt++;
+ continue;
+ }
+
+ UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
+
+ if (nextNativeOfs != lastNativeOfs)
+ {
+ mappingCnt++;
+ lastNativeOfs = nextNativeOfs;
+ prevMapping = tmpMapping;
+ continue;
+ }
+
+ /* If there are mappings with the same native offset, then:
+ o If one of them is NO_MAPPING, ignore it
+ o If one of them is a label, report that and ignore the other one
+ o Else report the higher IL offset
+ */
+
+ PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
+ if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
+ {
+ // If the previous entry was NO_MAPPING, ignore it
+ prevMapping->ipmdNativeLoc.Init();
+ prevMapping = tmpMapping;
+ }
+ else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
+ {
+ // If the current entry is NO_MAPPING, ignore it
+ // Leave prevMapping unchanged as tmpMapping is no longer valid
+ tmpMapping->ipmdNativeLoc.Init();
+ }
+ else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
+ {
+ // counting for special cases: see below
+ mappingCnt++;
+ prevMapping = tmpMapping;
+ }
+ else
+ {
+ noway_assert(prevMapping != nullptr);
+ noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
+ lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
+
+ /* The previous block had the same native offset. We have to
+ discard one of the mappings. Simply reinitialize ipmdNativeLoc
+ and prevMapping will be ignored later. */
+
+ if (prevMapping->ipmdIsLabel)
+ {
+ // Leave prevMapping unchanged as tmpMapping is no longer valid
+ tmpMapping->ipmdNativeLoc.Init();
+ }
+ else
+ {
+ prevMapping->ipmdNativeLoc.Init();
+ prevMapping = tmpMapping;
+ }
+ }
+ }
+
+ /* Tell them how many mapping records we've got */
+
+ compiler->eeSetLIcount(mappingCnt);
+
+ /* Now tell them about the mappings */
+
+ mappingCnt = 0;
+ lastNativeOfs = UNATIVE_OFFSET(~0);
+
+ for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
+ {
+ // Do we have to skip this record ?
+ if (!tmpMapping->ipmdNativeLoc.Valid())
+ {
+ continue;
+ }
+
+ UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
+ IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
+
+ if (jitIsCallInstruction(srcIP))
+ {
+ compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
+ }
+ else if (nextNativeOfs != lastNativeOfs)
+ {
+ compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
+ lastNativeOfs = nextNativeOfs;
+ }
+ else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
+ {
+ // For the special case of an IL instruction with no body
+ // followed by the epilog (say ret void immediately preceding
+ // the method end), we put two entries in, so that we'll stop
+ // at the (empty) ret statement if the user tries to put a
+ // breakpoint there, and then have the option of seeing the
+ // epilog or not based on SetUnmappedStopMask for the stepper.
+ compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
+ }
+ }
+
+#if 0
+ // TODO-Review:
+ //This check is disabled. It is always true that any time this check asserts, the debugger would have a
+ //problem with IL source level debugging. However, for a C# file, it only matters if things are on
+ //different source lines. As a result, we have all sorts of latent problems with how we emit debug
+ //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
+ //assert back on.
+ if (compiler->opts.compDbgCode)
+ {
+ //Assert that the first instruction of every basic block with more than one incoming edge has a
+ //different sequence point from each incoming block.
+ //
+ //It turns out that the only thing we really have to assert is that the first statement in each basic
+ //block has an IL offset and appears in eeBoundaries.
+ for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
+ {
+ noway_assert(block->bbTreeList->gtOper == GT_STMT);
+ bool found = false;
+ if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
+ for (unsigned i = 0; i < eeBoundariesCount; ++i)
+ {
+ if (eeBoundaries[i].ilOffset == ilOffs)
+ {
+ found = true;
+ break;
+ }
+ }
+ }
+ noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
+ }
+ }
+ }
+#endif // 0
+
+ compiler->eeSetLIdone();
+}
+
+#endif // DEBUGGING_SUPPORT
+
+/*============================================================================
+ *
+ * These are empty stubs to help the late dis-assembler to compile
+ * if DEBUGGING_SUPPORT is not enabled, or the late disassembler is being
+ * built into a non-DEBUG build.
+ *
+ *============================================================================
+ */
+
+#if defined(LATE_DISASM)
+#if !defined(DEBUGGING_SUPPORT) || !defined(DEBUG)
+
+/* virtual */
+const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
+{
+ return NULL;
+}
+
+/* virtual */
+const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
+{
+ return NULL;
+}
+
+/*****************************************************************************/
+#endif // !defined(DEBUGGING_SUPPORT) || !defined(DEBUG)
+#endif // defined(LATE_DISASM)
+/*****************************************************************************/