summaryrefslogtreecommitdiff
path: root/src/jit/codegenxarch.cpp
diff options
context:
space:
mode:
authorJiyoung Yun <jy910.yun@samsung.com>2016-12-27 16:46:08 +0900
committerJiyoung Yun <jy910.yun@samsung.com>2016-12-27 16:46:08 +0900
commitdb20f3f1bb8595633a7e16c8900fd401a453a6b5 (patch)
treee5435159cd1bf0519276363a6fe1663d1721bed3 /src/jit/codegenxarch.cpp
parent4b4aad7217d3292650e77eec2cf4c198ea9c3b4b (diff)
downloadcoreclr-db20f3f1bb8595633a7e16c8900fd401a453a6b5.tar.gz
coreclr-db20f3f1bb8595633a7e16c8900fd401a453a6b5.tar.bz2
coreclr-db20f3f1bb8595633a7e16c8900fd401a453a6b5.zip
Imported Upstream version 1.0.0.9127upstream/1.0.0.9127
Diffstat (limited to 'src/jit/codegenxarch.cpp')
-rw-r--r--src/jit/codegenxarch.cpp3899
1 files changed, 1577 insertions, 2322 deletions
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index a41c28695b..8e0af48799 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -24,114 +24,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "gcinfo.h"
#include "gcinfoencoder.h"
-// Get the register assigned to the given node
-
-regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
-{
- return tree->gtRegNum;
-}
-
-//------------------------------------------------------------------------
-// genSpillVar: Spill a local variable
-//
-// Arguments:
-// tree - the lclVar node for the variable being spilled
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The lclVar must be a register candidate (lvRegCandidate)
-
-void CodeGen::genSpillVar(GenTreePtr tree)
-{
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- assert(varDsc->lvIsRegCandidate());
-
- // We don't actually need to spill if it is already living in memory
- bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
- if (needsSpill)
- {
- var_types lclTyp = varDsc->TypeGet();
- if (varDsc->lvNormalizeOnStore())
- {
- lclTyp = genActualType(lclTyp);
- }
- emitAttr size = emitTypeSize(lclTyp);
-
- bool restoreRegVar = false;
- if (tree->gtOper == GT_REG_VAR)
- {
- tree->SetOper(GT_LCL_VAR);
- restoreRegVar = true;
- }
-
- // mask off the flag to generate the right spill code, then bring it back
- tree->gtFlags &= ~GTF_REG_VAL;
-
- instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
-#if CPU_LONG_USES_REGPAIR
- if (varTypeIsMultiReg(tree))
- {
- assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
- assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
- regNumber regLo = genRegPairLo(tree->gtRegPair);
- regNumber regHi = genRegPairHi(tree->gtRegPair);
- inst_TT_RV(storeIns, tree, regLo);
- inst_TT_RV(storeIns, tree, regHi, 4);
- }
- else
-#endif
- {
- assert(varDsc->lvRegNum == tree->gtRegNum);
- inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
- }
- tree->gtFlags |= GTF_REG_VAL;
-
- if (restoreRegVar)
- {
- tree->SetOper(GT_REG_VAR);
- }
-
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
- gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
-
- if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
- {
-#ifdef DEBUG
- if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
- }
-#endif
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
-
- tree->gtFlags &= ~GTF_SPILL;
- varDsc->lvRegNum = REG_STK;
- if (varTypeIsMultiReg(tree))
- {
- varDsc->lvOtherReg = REG_STK;
- }
-}
-
-// inline
-void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
-{
- assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
- varDsc->lvRegNum = tree->gtRegNum;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-
/*****************************************************************************
*
* Generate code that will set the given register to the integer constant.
@@ -231,6 +123,8 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
}
regNumber regGSCheck;
+ regMaskTP regMaskGSCheck = RBM_NONE;
+
if (!pushReg)
{
// Non-tail call: we can use any callee trash register that is not
@@ -251,8 +145,11 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
else
{
#ifdef _TARGET_X86_
- NYI_X86("Tail calls from methods that need GS check");
- regGSCheck = REG_NA;
+ // It doesn't matter which register we pick, since we're going to save and restore it
+ // around the check.
+ // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes?
+ regGSCheck = REG_EAX;
+ regMaskGSCheck = RBM_EAX;
#else // !_TARGET_X86_
// Tail calls from methods that need GS check: We need to preserve registers while
// emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
@@ -287,8 +184,13 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
#endif // !_TARGET_X86_
}
+ regMaskTP byrefPushedRegs = RBM_NONE;
+ regMaskTP norefPushedRegs = RBM_NONE;
+ regMaskTP pushedRegs = RBM_NONE;
+
if (compiler->gsGlobalSecurityCookieAddr == nullptr)
{
+#if defined(_TARGET_AMD64_)
// If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
// Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
@@ -297,7 +199,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
}
else
+#endif // defined(_TARGET_AMD64_)
{
+ assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
(int)compiler->gsGlobalSecurityCookieVal);
}
@@ -305,6 +209,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
else
{
// Ngen case - GS cookie value needs to be accessed through an indirection.
+
+ pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
+
instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
@@ -315,821 +222,180 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
inst_JMP(jmpEqual, gsCheckBlk);
genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
genDefineTempLabel(gsCheckBlk);
-}
-/*****************************************************************************
- *
- * Generate code for all the basic blocks in the function.
- */
+ genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
+}
-void CodeGen::genCodeForBBlist()
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- unsigned savedStkLvl;
-
-#ifdef DEBUG
- genInterruptibleUsed = true;
-
- // You have to be careful if you create basic blocks from now on
- compiler->fgSafeBasicBlockCreation = false;
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnCall)
- {
- compiler->opts.compStackCheckOnCall = false;
- }
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnRet)
- {
- compiler->opts.compStackCheckOnRet = false;
- }
-#endif // DEBUG
-
- // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
- genPrepForEHCodegen();
-
- assert(!compiler->fgFirstBBScratch ||
- compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
-
- /* Initialize the spill tracking logic */
-
- regSet.rsSpillBeg();
-
-#ifdef DEBUGGING_SUPPORT
- /* Initialize the line# tracking logic */
+#if FEATURE_EH_FUNCLETS
+ // Generate a call to the finally, like this:
+ // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
+ // call finally-funclet
+ // jmp finally-return // Only for non-retless finally calls
+ // The jmp can be a NOP if we're going to the next block.
+ // If we're generating code for the main function (not a funclet), and there is no localloc,
+ // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP
+ // instead of loading the PSPSym in this case, or if PSPSym is not used (CoreRT ABI).
- if (compiler->opts.compScopeInfo)
+ if ((compiler->lvaPSPSym == BAD_VAR_NUM) ||
+ (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)))
{
- siInit();
+ inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
}
-#endif
-
- // The current implementation of switch tables requires the first block to have a label so it
- // can generate offsets to the switch label targets.
- // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
- if (compiler->fgHasSwitch)
+ else
{
- compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
}
+ getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
- genPendingCallLabel = nullptr;
-
- /* Initialize the pointer tracking code */
-
- gcInfo.gcRegPtrSetInit();
- gcInfo.gcVarPtrSetInit();
-
- /* If any arguments live in registers, mark those regs as such */
-
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ if (block->bbFlags & BBF_RETLESS_CALL)
{
- /* Is this variable a parameter assigned to a register? */
-
- if (!varDsc->lvIsParam || !varDsc->lvRegister)
- {
- continue;
- }
+ // We have a retless call, and the last instruction generated was a call.
+ // If the next block is in a different EH region (or is the end of the code
+ // block), then we need to generate a breakpoint here (since it will never
+ // get executed) to get proper unwind behavior.
- /* Is the argument live on entry to the method? */
-
- if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
{
- continue;
- }
-
- /* Is this a floating-point argument? */
-
- if (varDsc->IsFloatRegType())
- {
- continue;
+ instGen(INS_BREAKPOINT); // This should never get executed
}
-
- noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
-
- /* Mark the register as holding the variable */
-
- regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
}
-
- unsigned finallyNesting = 0;
-
- // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
- // allocation at the start of each basic block.
- VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
-
- /*-------------------------------------------------------------------------
- *
- * Walk the basic blocks and generate code for each one
- *
- */
-
- BasicBlock* block;
- BasicBlock* lblk; /* previous block */
-
- for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+ else
{
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n=============== Generating ");
- block->dspBlockHeader(compiler, true, true);
- compiler->fgDispBBLiveness(block);
- }
-#endif // DEBUG
-
- // Figure out which registers hold variables on entry to this block
-
- regSet.ClearMaskVars();
- gcInfo.gcRegGCrefSetCur = RBM_NONE;
- gcInfo.gcRegByrefSetCur = RBM_NONE;
-
- compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
-
- genUpdateLife(block->bbLiveIn);
-
- // Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
- // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
- // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
-
- regMaskTP newLiveRegSet = RBM_NONE;
- regMaskTP newRegGCrefSet = RBM_NONE;
- regMaskTP newRegByrefSet = RBM_NONE;
-#ifdef DEBUG
- VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
- VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
-#endif
- VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- if (varDsc->lvIsInReg())
- {
- newLiveRegSet |= varDsc->lvRegMask();
- if (varDsc->lvType == TYP_REF)
- {
- newRegGCrefSet |= varDsc->lvRegMask();
- }
- else if (varDsc->lvType == TYP_BYREF)
- {
- newRegByrefSet |= varDsc->lvRegMask();
- }
-#ifdef DEBUG
- if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- else if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- }
-
- regSet.rsMaskVars = newLiveRegSet;
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- if (!VarSetOps::IsEmpty(compiler, addedGCVars))
- {
- printf("\t\t\t\t\t\t\tAdded GCVars: ");
- dumpConvertedVarSet(compiler, addedGCVars);
- printf("\n");
- }
- if (!VarSetOps::IsEmpty(compiler, removedGCVars))
- {
- printf("\t\t\t\t\t\t\tRemoved GCVars: ");
- dumpConvertedVarSet(compiler, removedGCVars);
- printf("\n");
- }
- }
-#endif // DEBUG
-
- gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
- gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
-
- /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
- represent the exception object (TYP_REF).
- We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
- to the block, it will be the first thing evaluated
- (thanks to GTF_ORDER_SIDEEFF).
- */
-
- if (handlerGetsXcptnObj(block->bbCatchTyp))
- {
- for (GenTree* node : LIR::AsRange(block))
- {
- if (node->OperGet() == GT_CATCH_ARG)
- {
- gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
- break;
- }
- }
- }
-
- /* Start a new code output block */
-
- genUpdateCurrentFunclet(block);
-
- if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
- {
- getEmitter()->emitLoopAlign();
- }
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- {
- printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
- }
-#endif
-
- block->bbEmitCookie = nullptr;
-
- if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
- {
- /* Mark a label and update the current set of live GC refs */
-
- block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, FALSE);
- }
-
- if (block == compiler->fgFirstColdBlock)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nThis is the start of the cold region of the method\n");
- }
-#endif
- // We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
-
- // We require the block that starts the Cold section to have a label
- noway_assert(block->bbEmitCookie);
- getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
- }
-
- /* Both stacks are always empty on entry to a basic block */
-
- genStackLevel = 0;
-
- savedStkLvl = genStackLevel;
-
- /* Tell everyone which basic block we're working on */
-
- compiler->compCurBB = block;
-
-#ifdef DEBUGGING_SUPPORT
- siBeginBlock(block);
-
- // BBF_INTERNAL blocks don't correspond to any single IL instruction.
- if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
- !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
- // emit a NO_MAPPING entry, immediately after the prolog.
- {
- genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
- }
-
- bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
-
- /*---------------------------------------------------------------------
- *
- * Generate code for each statement-tree in the block
- *
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
- if (block->bbFlags & BBF_FUNCLET_BEG)
- {
- genReserveFuncletProlog(block);
- }
-#endif // FEATURE_EH_FUNCLETS
-
- // Clear compCurStmt and compCurLifeTree.
- compiler->compCurStmt = nullptr;
- compiler->compCurLifeTree = nullptr;
-
- // Traverse the block in linear order, generating code for each node as we
- // as we encounter it.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUGGING_SUPPORT
- IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
-#endif
- for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
- {
-#ifdef DEBUGGING_SUPPORT
- // Do we have a new IL offset?
- if (node->OperGet() == GT_IL_OFFSET)
- {
- genEnsureCodeEmitted(currentILOffset);
- currentILOffset = node->gtStmt.gtStmtILoffsx;
- genIPmappingAdd(currentILOffset, firstMapping);
- firstMapping = false;
- }
-#endif // DEBUGGING_SUPPORT
-
-#ifdef DEBUG
- if (node->OperGet() == GT_IL_OFFSET)
- {
- noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
- node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
-
- if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
- node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
- {
- while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
- {
- genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
- }
- }
- }
-#endif // DEBUG
-
- genCodeForTreeNode(node);
- if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
- {
- genConsumeReg(node);
- }
- } // end for each node in block
-
-#ifdef DEBUG
- // The following set of register spill checks and GC pointer tracking checks used to be
- // performed at statement boundaries. Now, with LIR, there are no statements, so they are
- // performed at the end of each block.
- // TODO: could these checks be performed more frequently? E.g., at each location where
- // the register allocator says there are no live non-variable registers. Perhaps this could
- // be done by (a) keeping a running count of live non-variable registers by using
- // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
- // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
- // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
- // there will be no live non-variable registers.
-
- regSet.rsSpillChk();
-
- /* Make sure we didn't bungle pointer register tracking */
-
- regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
-
- // If return is a GC-type, clear it. Note that if a common
- // epilog is generated (genReturnBB) it has a void return
- // even though we might return a ref. We can't use the compRetType
- // as the determiner because something we are tracking as a byref
- // might be used as a return value of a int function (which is legal)
- GenTree* blockLastNode = block->lastNode();
- if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
- (varTypeIsGC(compiler->info.compRetType) ||
- (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
- {
- nonVarPtrRegs &= ~RBM_INTRET;
- }
-
- if (nonVarPtrRegs)
- {
- printf("Regset after BB%02u gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- printf(", regVars=");
- printRegMaskInt(regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
- printf("\n");
- }
-
- noway_assert(nonVarPtrRegs == RBM_NONE);
-#endif // DEBUG
-
-#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
- if (block->bbNext == nullptr)
- {
- // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
- // (it's as good as any, but better than the prolog, which can only be a single instruction
- // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
- // thinks the instructions are the same as we do.
- genAmd64EmitterUnitTests();
- }
-#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
-
-#ifdef DEBUGGING_SUPPORT
- // It is possible to reach the end of the block without generating code for the current IL offset.
- // For example, if the following IR ends the current block, no code will have been generated for
- // offset 21:
- //
- // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
- //
- // N001 ( 0, 0) [000039] ------------ nop void
- //
- // This can lead to problems when debugging the generated code. To prevent these issues, make sure
- // we've generated code for the last IL offset we saw in the block.
- genEnsureCodeEmitted(currentILOffset);
+ // Because of the way the flowgraph is connected, the liveness info for this one instruction
+ // after the call is not (can not be) correct in cases where a variable has a last use in the
+ // handler. So turn off GC reporting for this single instruction.
+ getEmitter()->emitDisableGC();
- if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ // Now go to where the finally funclet needs to return to.
+ if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
{
- siEndBlock(block);
-
- /* Is this the last block, and are there any open scopes left ? */
-
- bool isLastBlockProcessed = (block->bbNext == nullptr);
- if (block->isBBCallAlwaysPair())
- {
- isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
- }
-
- if (isLastBlockProcessed && siOpenScopeList.scNext)
- {
- /* This assert no longer holds, because we may insert a throw
- block to demarcate the end of a try or finally region when they
- are at the end of the method. It would be nice if we could fix
- our code so that this throw block will no longer be necessary. */
-
- // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
-
- siCloseAllOpenScopes();
- }
+ // Fall-through.
+ // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
+ // to the next instruction? This would depend on stack walking from within the finally
+ // handler working without this instruction being in this special EH region.
+ instGen(INS_nop);
}
-
-#endif // DEBUGGING_SUPPORT
-
- genStackLevel -= savedStkLvl;
-
-#ifdef DEBUG
- // compCurLife should be equal to the liveOut set, except that we don't keep
- // it up to date for vars that are not register candidates
- // (it would be nice to have a xor set function)
-
- VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
- VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
- VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
- while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ else
{
- unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- assert(!varDsc->lvIsRegCandidate());
+ inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
}
-#endif
-
- /* Both stacks should always be empty on exit from a basic block */
- noway_assert(genStackLevel == 0);
-
-#ifdef _TARGET_AMD64_
- // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
- // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
- // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
- // The document "X64 and ARM ABIs.docx" has more details. The situations:
- // 1. If the call instruction is in a different EH region as the instruction that follows it.
- // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
- // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
- // here.)
- // We handle case #1 here, and case #2 in the emitter.
- if (getEmitter()->emitIsLastInsCall())
- {
- // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
- // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
- // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
- // generated before the OS epilog starts, such as a GS cookie check.
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- // We only need the NOP if we're not going to generate any more code as part of the block end.
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- case BBJ_THROW:
- case BBJ_CALLFINALLY:
- case BBJ_EHCATCHRET:
- // We're going to generate more code below anyway, so no need for the NOP.
-
- case BBJ_RETURN:
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- // These are the "epilog follows" case, handled in the emitter.
-
- break;
-
- case BBJ_NONE:
- if (block->bbNext == nullptr)
- {
- // Call immediately before the end of the code; we should never get here .
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- else
- {
- // We need the NOP
- instGen(INS_nop);
- }
- break;
-
- case BBJ_COND:
- case BBJ_SWITCH:
- // These can't have a call as the last instruction!
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
- }
- }
-#endif // _TARGET_AMD64_
-
- /* Do we need to generate a jump or return? */
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- inst_JMP(EJ_jmp, block->bbJumpDest);
- break;
-
- case BBJ_RETURN:
- genExitCode(block);
- break;
-
- case BBJ_THROW:
- // If we have a throw at the end of a function or funclet, we need to emit another instruction
- // afterwards to help the OS unwinder determine the correct context during unwind.
- // We insert an unexecuted breakpoint instruction in several situations
- // following a throw instruction:
- // 1. If the throw is the last instruction of the function or funclet. This helps
- // the OS unwinder determine the correct context during an unwind from the
- // thrown exception.
- // 2. If this is this is the last block of the hot section.
- // 3. If the subsequent block is a special throw block.
- // 4. On AMD64, if the next block is in a different EH region.
- if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
- !BasicBlock::sameEHRegion(block, block->bbNext) ||
- (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
- block->bbNext == compiler->fgFirstColdBlock)
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
-
- break;
-
- case BBJ_CALLFINALLY:
-
-#if FEATURE_EH_FUNCLETS
-
- // Generate a call to the finally, like this:
- // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
- // call finally-funclet
- // jmp finally-return // Only for non-retless finally calls
- // The jmp can be a NOP if we're going to the next block.
- // If we're generating code for the main function (not a funclet), and there is no localloc,
- // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP
- // instead of loading the PSPSym in this case.
- if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))
- {
- inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
- }
- else
- {
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
- }
- getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
+ getEmitter()->emitEnableGC();
+ }
- if (block->bbFlags & BBF_RETLESS_CALL)
- {
- // We have a retless call, and the last instruction generated was a call.
- // If the next block is in a different EH region (or is the end of the code
- // block), then we need to generate a breakpoint here (since it will never
- // get executed) to get proper unwind behavior.
+#else // !FEATURE_EH_FUNCLETS
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- }
- else
- {
- // Because of the way the flowgraph is connected, the liveness info for this one instruction
- // after the call is not (can not be) correct in cases where a variable has a last use in the
- // handler. So turn off GC reporting for this single instruction.
- getEmitter()->emitDisableGC();
+ // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
+ // corresponding to the finally's nesting level. When invoked in response to an exception, the
+ // EE does this.
+ //
+ // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
+ //
+ // We will emit :
+ // mov [ebp - (n + 1)], 0
+ // mov [ebp - n ], 0xFC
+ // push &step
+ // jmp finallyBlock
+ // ...
+ // step:
+ // mov [ebp - n ], 0
+ // jmp leaveTarget
+ // ...
+ // leaveTarget:
+
+ noway_assert(isFramePointerUsed());
+
+ // Get the nesting level which contains the finally
+ unsigned finallyNesting = 0;
+ compiler->fgGetNestingLevel(block, &finallyNesting);
- // Now go to where the finally funclet needs to return to.
- if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
- {
- // Fall-through.
- // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
- // to the next instruction? This would depend on stack walking from within the finally
- // handler working without this instruction being in this special EH region.
- instGen(INS_nop);
- }
- else
- {
- inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
- }
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
- getEmitter()->emitEnableGC();
- }
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
-#else // !FEATURE_EH_FUNCLETS
+ // Zero out the slot for the next nesting level
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs - TARGET_POINTER_SIZE);
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs);
- // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
- // corresponding to the finally's nesting level. When invoked in response to an exception, the
- // EE does this.
- //
- // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
- //
- // We will emit :
- // mov [ebp - (n + 1)], 0
- // mov [ebp - n ], 0xFC
- // push &step
- // jmp finallyBlock
- // ...
- // step:
- // mov [ebp - n ], 0
- // jmp leaveTarget
- // ...
- // leaveTarget:
-
- noway_assert(isFramePointerUsed());
-
- // Get the nesting level which contains the finally
- compiler->fgGetNestingLevel(block, &finallyNesting);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- filterEndOffsetSlotOffs =
- (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
-
- // Zero out the slot for the next nesting level
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs - TARGET_POINTER_SIZE);
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs);
-
- // Now push the address where the finally funclet should return to directly.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
- getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
- }
- else
- {
- // EE expects a DWORD, so we give him 0
- inst_IV(INS_push_hide, 0);
- }
+ // Now push the address where the finally funclet should return to directly.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+ getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
+ }
+ else
+ {
+ // EE expects a DWORD, so we give him 0
+ inst_IV(INS_push_hide, 0);
+ }
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
#endif // !FEATURE_EH_FUNCLETS
- // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
- // jump target using bbJumpDest - that is already used to point
- // to the finally block. So just skip past the BBJ_ALWAYS unless the
- // block is RETLESS.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
-
- lblk = block;
- block = block->bbNext;
- }
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
- break;
+ lblk = block;
+ block = block->bbNext;
+ }
+ return block;
+}
#if FEATURE_EH_FUNCLETS
-
- case BBJ_EHCATCHRET:
- // Set RAX to the address the VM should return to after the catch.
- // Generate a RIP-relative
- // lea reg, [rip + disp32] ; the RIP is implicit
- // which will be position-indepenent.
- getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
+void CodeGen::genEHCatchRet(BasicBlock* block)
+{
+ // Set RAX to the address the VM should return to after the catch.
+ // Generate a RIP-relative
+ // lea reg, [rip + disp32] ; the RIP is implicit
+ // which will be position-indepenent.
+ getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
+}
#else // !FEATURE_EH_FUNCLETS
- case BBJ_EHCATCHRET:
- noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- {
- // The last statement of the block must be a GT_RETFILT, which has already been generated.
- assert(block->lastNode() != nullptr);
- assert(block->lastNode()->OperGet() == GT_RETFILT);
-
- if (block->bbJumpKind == BBJ_EHFINALLYRET)
- {
- assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
-
- // Return using a pop-jmp sequence. As the "try" block calls
- // the finally with a jmp, this leaves the x86 call-ret stack
- // balanced in the normal flow of path.
-
- noway_assert(isFramePointerRequired());
- inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
- inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
- }
- else
- {
- assert(block->bbJumpKind == BBJ_EHFILTERRET);
-
- // The return value has already been computed.
- instGen_Return(0);
- }
- }
- break;
-
-#endif // !FEATURE_EH_FUNCLETS
-
- case BBJ_NONE:
- case BBJ_COND:
- case BBJ_SWITCH:
- break;
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
-
-#ifdef DEBUG
- compiler->compCurBB = nullptr;
-#endif
-
- } //------------------ END-FOR each block of the method -------------------
-
- /* Nothing is live at this point */
- genUpdateLife(VarSetOps::MakeEmpty(compiler));
-
- /* Finalize the spill tracking logic */
-
- regSet.rsSpillEnd();
-
- /* Finalize the temp tracking logic */
-
- compiler->tmpEnd();
+void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block)
+{
+ // The last statement of the block must be a GT_RETFILT, which has already been generated.
+ assert(block->lastNode() != nullptr);
+ assert(block->lastNode()->OperGet() == GT_RETFILT);
-#ifdef DEBUG
- if (compiler->verbose)
+ if (block->bbJumpKind == BBJ_EHFINALLYRET)
{
- printf("\n# ");
- printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
- compiler->compSizeEstimate);
- printf("%s\n", compiler->info.compFullName);
- }
-#endif
-}
+ assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
-// return the child that has the same reg as the dst (if any)
-// other child returned (out param) in 'other'
-GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
-{
- if (tree->gtRegNum == REG_NA)
- {
- other = nullptr;
- return nullptr;
- }
+ // Return using a pop-jmp sequence. As the "try" block calls
+ // the finally with a jmp, this leaves the x86 call-ret stack
+ // balanced in the normal flow of path.
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- if (op1->gtRegNum == tree->gtRegNum)
- {
- other = op2;
- return op1;
- }
- if (op2->gtRegNum == tree->gtRegNum)
- {
- other = op1;
- return op2;
+ noway_assert(isFramePointerRequired());
+ inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+ inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
}
else
{
- other = nullptr;
- return nullptr;
+ assert(block->bbJumpKind == BBJ_EHFILTERRET);
+
+ // The return value has already been computed.
+ instGen_Return(0);
}
}
+#endif // !FEATURE_EH_FUNCLETS
+
// Move an immediate value into an integer register
void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
@@ -1227,7 +493,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
// Generate code to get the high N bits of a N*N=2N bit multiplication result
void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
{
- assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ if (treeNode->OperGet() == GT_MULHI)
+ {
+ assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ }
assert(!treeNode->gtOverflowEx());
regNumber targetReg = treeNode->gtRegNum;
@@ -1247,8 +516,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
GenTree* rmOp = op2;
// Set rmOp to the contained memory operand (if any)
- //
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
+ if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == REG_RAX)))
{
regOp = op2;
rmOp = op1;
@@ -1256,25 +524,131 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
assert(!regOp->isContained());
// Setup targetReg when neither of the source operands was a matching register
- if (regOp->gtRegNum != targetReg)
+ if (regOp->gtRegNum != REG_RAX)
{
- inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
+ inst_RV_RV(ins_Copy(targetType), REG_RAX, regOp->gtRegNum, targetType);
}
- emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+ instruction ins;
+ if ((treeNode->gtFlags & GTF_UNSIGNED) == 0)
+ {
+ ins = INS_imulEAX;
+ }
+ else
+ {
+ ins = INS_mulEAX;
+ }
+ emit->emitInsBinary(ins, size, treeNode, rmOp);
// Move the result to the desired register, if necessary
- if (targetReg != REG_RDX)
+ if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX)
{
inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
}
}
-// generate code for a DIV or MOD operation
+#ifdef _TARGET_X86_
+//------------------------------------------------------------------------
+// genCodeForLongUMod: Generate code for a tree of the form
+// `(umod (gt_long x y) (const int))`
+//
+// Arguments:
+// node - the node for which to generate code
+//
+void CodeGen::genCodeForLongUMod(GenTreeOp* node)
+{
+ assert(node != nullptr);
+ assert(node->OperGet() == GT_UMOD);
+ assert(node->TypeGet() == TYP_INT);
+
+ GenTreeOp* const dividend = node->gtOp1->AsOp();
+ assert(dividend->OperGet() == GT_LONG);
+ assert(varTypeIsLong(dividend));
+
+ genConsumeOperands(node);
+
+ GenTree* const dividendLo = dividend->gtOp1;
+ GenTree* const dividendHi = dividend->gtOp2;
+ assert(!dividendLo->isContained());
+ assert(!dividendHi->isContained());
+
+ GenTree* const divisor = node->gtOp2;
+ assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT);
+ assert(!divisor->gtSkipReloadOrCopy()->isContained());
+ assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2);
+ assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff);
+
+ // dividendLo must be in RAX; dividendHi must be in RDX
+ genCopyRegIfNeeded(dividendLo, REG_EAX);
+ genCopyRegIfNeeded(dividendHi, REG_EDX);
+
+ // At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
+ // contains the 32bit divisor. We want to generate the following code:
+ //
+ // cmp edx, divisor->gtRegNum
+ // jb noOverflow
+ //
+ // mov temp, eax
+ // mov eax, edx
+ // xor edx, edx
+ // div divisor->gtRegNum
+ // mov eax, temp
+ //
+ // noOverflow:
+ // div divisor->gtRegNum
+ //
+ // This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c.
+
+ BasicBlock* const noOverflow = genCreateTempLabel();
+
+ // cmp edx, divisor->gtRegNum
+ // jb noOverflow
+ inst_RV_RV(INS_cmp, REG_EDX, divisor->gtRegNum);
+ inst_JMP(EJ_jb, noOverflow);
+
+ // mov temp, eax
+ // mov eax, edx
+ // xor edx, edx
+ // div divisor->gtRegNum
+ // mov eax, temp
+ const regNumber tempReg = genRegNumFromMask(node->gtRsvdRegs);
+ inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT);
+ inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
+ inst_RV(INS_div, divisor->gtRegNum, TYP_INT);
+ inst_RV_RV(INS_mov, REG_EAX, tempReg, TYP_INT);
+
+ // noOverflow:
+ // div divisor->gtRegNum
+ genDefineTempLabel(noOverflow);
+ inst_RV(INS_div, divisor->gtRegNum, TYP_INT);
+
+ const regNumber targetReg = node->gtRegNum;
+ if (targetReg != REG_EDX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, TYP_INT);
+ }
+ genProduceReg(node);
+}
+#endif // _TARGET_X86_
+
+//------------------------------------------------------------------------
+// genCodeForDivMod: Generate code for a DIV or MOD operation.
+//
+// Arguments:
+// treeNode - the node to generate the code for
//
void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
{
- GenTree* dividend = treeNode->gtOp1;
+ GenTree* dividend = treeNode->gtOp1;
+#ifdef _TARGET_X86_
+ if (varTypeIsLong(dividend->TypeGet()))
+ {
+ genCodeForLongUMod(treeNode);
+ return;
+ }
+#endif // _TARGET_X86_
+
GenTree* divisor = treeNode->gtOp2;
genTreeOps oper = treeNode->OperGet();
emitAttr size = emitTypeSize(treeNode);
@@ -1319,10 +693,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
else
{
// dividend must be in RAX
- if (dividend->gtRegNum != REG_RAX)
- {
- inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
- }
+ genCopyRegIfNeeded(dividend, REG_RAX);
// zero or sign extend rax to rdx
if (oper == GT_UMOD || oper == GT_UDIV)
@@ -1395,7 +766,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
#else // !defined(_TARGET_64BIT_)
assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
- oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI ||
+ oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_LONG || oper == GT_DIV_HI || oper == GT_MOD_HI ||
oper == GT_ADD || oper == GT_SUB);
#endif // !defined(_TARGET_64BIT_)
@@ -1443,7 +814,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
}
// now we know there are 3 different operands so attempt to use LEA
else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
- && (op2->isContainedIntOrIImmed() || !op2->isContained()))
+ && (op2->isContainedIntOrIImmed() || !op2->isContained()) && !treeNode->gtSetFlags())
{
if (op2->isContainedIntOrIImmed())
{
@@ -1833,7 +1204,7 @@ void CodeGen::genReturn(GenTreePtr treeNode)
//
// Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
// In flowgraph and other places assert that the last node of a block marked as
- // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
+ // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
// maintain such an invariant irrespective of whether profiler hook needed or not.
// Also, there is not much to be gained by materializing it as an explicit node.
if (compiler->compCurBB == compiler->genReturnBB)
@@ -1913,9 +1284,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
switch (treeNode->gtOper)
{
+#ifndef JIT32_GCENCODER
case GT_START_NONGC:
getEmitter()->emitDisableGC();
break;
+#endif // !defined(JIT32_GCENCODER)
case GT_PROF_HOOK:
#ifdef PROFILING_SUPPORTED
@@ -1996,14 +1369,18 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// genCodeForShift() calls genProduceReg()
break;
- case GT_CAST:
#if !defined(_TARGET_64BIT_)
- // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not
- // yet support casting FROM a long type either, and that's simpler to catch
- // here.
- NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG");
-#endif // !defined(_TARGET_64BIT_)
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+ // TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't
+ // need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to
+ // targetReg if sourceHi is a contained mem-op). Similarly for GT_RSH_LO, sourceLo could be marked as
+ // contained memory-op. Even if not a memory-op, we could mark it as reg-optional.
+ genCodeForShiftLong(treeNode);
+ break;
+#endif
+ case GT_CAST:
if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
{
// Casts float/double <--> double/float
@@ -2037,7 +1414,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
{
- assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+ assert(treeNode->InReg() || (treeNode->gtFlags & GTF_SPILLED));
}
// If this is a register candidate that has been spilled, genConsumeReg() will
@@ -2047,6 +1424,15 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
assert(!isRegCandidate);
+#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+ // Loading of TYP_SIMD12 (i.e. Vector3) variable
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genLoadLclTypeSIMD12(treeNode);
+ break;
+ }
+#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+
emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
genProduceReg(treeNode);
@@ -2075,7 +1461,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// Loading of TYP_SIMD12 (i.e. Vector3) field
if (treeNode->TypeGet() == TYP_SIMD12)
{
- genLoadLclFldTypeSIMD12(treeNode);
+ genLoadLclTypeSIMD12(treeNode);
break;
}
#endif
@@ -2243,6 +1629,9 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_MULHI:
+#ifdef _TARGET_X86_
+ case GT_MUL_LONG:
+#endif
genCodeForMulHi(treeNode->AsOp());
genProduceReg(treeNode);
break;
@@ -2408,18 +1797,18 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// X86 Long comparison
else if (varTypeIsLong(op1Type))
{
- // When not materializing the result in a register, the compare logic is generated
- // when we generate the GT_JTRUE.
- if (treeNode->gtRegNum != REG_NA)
- {
- genCompareLong(treeNode);
- }
- else
- {
- // We generate the compare when we generate the GT_JTRUE, but we need to consume
- // the operands now.
- genConsumeOperands(treeNode->AsOp());
- }
+#ifdef DEBUG
+ // The result of an unlowered long compare on a 32-bit target must either be
+ // a) materialized into a register, or
+ // b) unused.
+ //
+ // A long compare that has a result that is used but not materialized into a register should
+ // have been handled by Lowering::LowerCompare.
+
+ LIR::Use use;
+ assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use));
+#endif
+ genCompareLong(treeNode);
}
#endif // !defined(_TARGET_64BIT_)
else
@@ -2437,52 +1826,60 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
#if !defined(_TARGET_64BIT_)
- // For long compares, we emit special logic
- if (varTypeIsLong(cmp->gtGetOp1()))
- {
- genJTrueLong(cmp);
- }
- else
+ // Long-typed compares should have been handled by Lowering::LowerCompare.
+ assert(!varTypeIsLong(cmp->gtGetOp1()));
#endif
- {
- // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
- // is governed by a flag NOT by the inherent type of the node
- // TODO-XArch-CQ: Check if we can use the currently set flags.
- emitJumpKind jumpKind[2];
- bool branchToTrueLabel[2];
- genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
- BasicBlock* skipLabel = nullptr;
- if (jumpKind[0] != EJ_NONE)
- {
- BasicBlock* jmpTarget;
- if (branchToTrueLabel[0])
- {
- jmpTarget = compiler->compCurBB->bbJumpDest;
- }
- else
- {
- // This case arises only for ordered GT_EQ right now
- assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
- skipLabel = genCreateTempLabel();
- jmpTarget = skipLabel;
- }
-
- inst_JMP(jumpKind[0], jmpTarget);
- }
+ // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
+ // is governed by a flag NOT by the inherent type of the node
+ // TODO-XArch-CQ: Check if we can use the currently set flags.
+ emitJumpKind jumpKind[2];
+ bool branchToTrueLabel[2];
+ genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
- if (jumpKind[1] != EJ_NONE)
+ BasicBlock* skipLabel = nullptr;
+ if (jumpKind[0] != EJ_NONE)
+ {
+ BasicBlock* jmpTarget;
+ if (branchToTrueLabel[0])
{
- // the second conditional branch always has to be to the true label
- assert(branchToTrueLabel[1]);
- inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+ jmpTarget = compiler->compCurBB->bbJumpDest;
}
-
- if (skipLabel != nullptr)
+ else
{
- genDefineTempLabel(skipLabel);
+ // This case arises only for ordered GT_EQ right now
+ assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
+ skipLabel = genCreateTempLabel();
+ jmpTarget = skipLabel;
}
+
+ inst_JMP(jumpKind[0], jmpTarget);
+ }
+
+ if (jumpKind[1] != EJ_NONE)
+ {
+ // the second conditional branch always has to be to the true label
+ assert(branchToTrueLabel[1]);
+ inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
}
+
+ if (skipLabel != nullptr)
+ {
+ genDefineTempLabel(skipLabel);
+ }
+ }
+ break;
+
+ case GT_JCC:
+ {
+ GenTreeJumpCC* jcc = treeNode->AsJumpCC();
+
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ emitJumpKind jumpKind = genJumpKindForOper(jcc->gtCondition, compareKind);
+
+ inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
}
break;
@@ -2572,12 +1969,13 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_ARGPLACE:
// Nothing to do
break;
case GT_PUTARG_STK:
- genPutArgStk(treeNode);
+ genPutArgStk(treeNode->AsPutArgStk());
break;
case GT_PUTARG_REG:
@@ -2608,7 +2006,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LOCKADD:
case GT_XCHG:
case GT_XADD:
- genLockedInstructions(treeNode);
+ genLockedInstructions(treeNode->AsOp());
break;
case GT_MEMORYBARRIER:
@@ -2795,7 +2193,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
#ifdef DEBUG
char message[256];
- sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+ _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n",
+ GenTree::NodeName(treeNode->OperGet()));
#endif
assert(!"Unknown node in codegen");
}
@@ -3330,8 +2729,10 @@ ALLOC_DONE:
BAILOUT:
// Write the lvaLocAllocSPvar stack frame slot
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+ if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+ }
#if STACK_PROBES
if (compiler->opts.compNeedStackProbes)
@@ -3356,10 +2757,15 @@ BAILOUT:
void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
{
+#ifdef JIT32_GCENCODER
+ assert(!storeBlkNode->gtBlkOpGcUnsafe);
+#else
if (storeBlkNode->gtBlkOpGcUnsafe)
{
getEmitter()->emitDisableGC();
}
+#endif // JIT32_GCENCODER
+
bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
switch (storeBlkNode->gtBlkOpKind)
@@ -3399,23 +2805,40 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
default:
unreached();
}
+
+#ifndef JIT32_GCENCODER
if (storeBlkNode->gtBlkOpGcUnsafe)
{
getEmitter()->emitEnableGC();
}
+#endif // !defined(JIT32_GCENCODER)
}
-// Generate code for InitBlk using rep stos.
+//
+//------------------------------------------------------------------------
+// genCodeForInitBlkRepStos: Generate code for InitBlk using rep stos.
+//
+// Arguments:
+// initBlkNode - The Block store for which we are generating code.
+//
// Preconditions:
-// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
-// Any value larger than that, we'll use the helper even if both the
-// fill byte and the size are integer constants.
+// On x64:
+// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
+// Any value larger than that, we'll use the helper even if both the fill byte and the
+// size are integer constants.
+// On x86:
+// The size must either be a non-constant or less than INITBLK_STOS_LIMIT bytes.
+//
void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
{
- // Make sure we got the arguments of the initblk/initobj operation in the right registers
+ // Make sure we got the arguments of the initblk/initobj operation in the right registers.
unsigned size = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
#ifdef DEBUG
assert(!dstAddr->isContained());
@@ -3428,7 +2851,8 @@ void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
#ifdef _TARGET_AMD64_
assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
#else
- assert(size > CPBLK_UNROLL_LIMIT);
+ // Note that a size of zero means a non-constant size.
+ assert((size == 0) || (size > CPBLK_UNROLL_LIMIT));
#endif
}
@@ -3449,9 +2873,13 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
unsigned size = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
assert(!dstAddr->isContained());
- assert(!initVal->isContained());
+ assert(!initVal->isContained() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0)));
assert(size != 0);
assert(size <= INITBLK_UNROLL_LIMIT);
assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
@@ -3512,9 +2940,11 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
offset += 4;
emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
offset += 4;
-#else // !_TARGET_X86_
+#else // !_TARGET_X86_
+
emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
offset += 8;
+
#endif // !_TARGET_X86_
}
if ((size & 4) != 0)
@@ -3544,6 +2974,10 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
unsigned blockSize = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
assert(!dstAddr->isContained());
assert(!initVal->isContained());
@@ -3760,21 +3194,145 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
instGen(INS_r_movsb);
}
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+//------------------------------------------------------------------------
+// CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area
+//
+// Arguments:
+// size - The size of bytes remaining to be moved
+// longTmpReg - The tmp register to be used for the long value
+// srcAddr - The address of the source struct
+// offset - The current offset being copied
+//
+// Return Value:
+// Returns the number of bytes moved (8 or 0).
+//
+// Notes:
+// This is used in the PutArgStkKindUnroll case, to move any bytes that are
+// not an even multiple of 16.
+// On x86, longTmpReg must be an xmm reg; on x64 it must be an integer register.
+// This is checked by genStoreRegToStackArg.
+//
+int CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset)
+{
+#ifdef _TARGET_X86_
+ instruction longMovIns = INS_movq;
+#else // !_TARGET_X86_
+ instruction longMovIns = INS_mov;
+#endif // !_TARGET_X86_
+ if ((size & 8) != 0)
+ {
+ genCodeForLoadOffset(longMovIns, EA_8BYTE, longTmpReg, srcAddr, offset);
+ genStoreRegToStackArg(TYP_LONG, longTmpReg, offset);
+ return 8;
+ }
+ return 0;
+}
+
+//------------------------------------------------------------------------
+// CodeGen::genMove4IfNeeded: Conditionally move 4 bytes of a struct to the argument area
+//
+// Arguments:
+// size - The size of bytes remaining to be moved
+// intTmpReg - The tmp register to be used for the long value
+// srcAddr - The address of the source struct
+// offset - The current offset being copied
+//
+// Return Value:
+// Returns the number of bytes moved (4 or 0).
+//
+// Notes:
+// This is used in the PutArgStkKindUnroll case, to move any bytes that are
+// not an even multiple of 16.
+// intTmpReg must be an integer register.
+// This is checked by genStoreRegToStackArg.
+//
+int CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+{
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, intTmpReg, srcAddr, offset);
+ genStoreRegToStackArg(TYP_INT, intTmpReg, offset);
+ return 4;
+ }
+ return 0;
+}
+
+//------------------------------------------------------------------------
+// CodeGen::genMove2IfNeeded: Conditionally move 2 bytes of a struct to the argument area
+//
+// Arguments:
+// size - The size of bytes remaining to be moved
+// intTmpReg - The tmp register to be used for the long value
+// srcAddr - The address of the source struct
+// offset - The current offset being copied
+//
+// Return Value:
+// Returns the number of bytes moved (2 or 0).
+//
+// Notes:
+// This is used in the PutArgStkKindUnroll case, to move any bytes that are
+// not an even multiple of 16.
+// intTmpReg must be an integer register.
+// This is checked by genStoreRegToStackArg.
+//
+int CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+{
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, intTmpReg, srcAddr, offset);
+ genStoreRegToStackArg(TYP_SHORT, intTmpReg, offset);
+ return 2;
+ }
+ return 0;
+}
+
+//------------------------------------------------------------------------
+// CodeGen::genMove1IfNeeded: Conditionally move 1 byte of a struct to the argument area
+//
+// Arguments:
+// size - The size of bytes remaining to be moved
+// intTmpReg - The tmp register to be used for the long value
+// srcAddr - The address of the source struct
+// offset - The current offset being copied
+//
+// Return Value:
+// Returns the number of bytes moved (1 or 0).
+//
+// Notes:
+// This is used in the PutArgStkKindUnroll case, to move any bytes that are
+// not an even multiple of 16.
+// intTmpReg must be an integer register.
+// This is checked by genStoreRegToStackArg.
+//
+int CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+{
+
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, intTmpReg, srcAddr, offset);
+ genStoreRegToStackArg(TYP_BYTE, intTmpReg, offset);
+ return 1;
+ }
+ return 0;
+}
//---------------------------------------------------------------------------------------------------------------//
// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
//
// Arguments:
// putArgNode - the PutArgStk tree.
-// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
+//
+// Notes:
+// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct will be copied to the
+// stack.
//
// TODO-Amd64-Unix: Try to share code with copyblk.
// Need refactoring of copyblk before it could be used for putarg_stk.
// The difference for now is that a putarg_stk contains its children, while cpyblk does not.
// This creates differences in code. After some significant refactoring it could be reused.
//
-void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
{
// We will never call this method for SIMD types, which are stored directly
// in genPutStructArgStk().
@@ -3801,14 +3359,43 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV
unsigned offset = 0;
+ regNumber xmmTmpReg = REG_NA;
+ regNumber intTmpReg = REG_NA;
+ regNumber longTmpReg = REG_NA;
+#ifdef _TARGET_X86_
+ // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's
+ // less than 16 bytes, we will just be using pushes
+ if (size >= 8)
+ {
+ xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ longTmpReg = xmmTmpReg;
+ }
+ if ((size & 0x7) != 0)
+ {
+ intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+ }
+#else // !_TARGET_X86_
+ // On x64 we use an XMM register only for 16-byte chunks.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ }
+ if ((size & 0xf) != 0)
+ {
+ intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+ longTmpReg = intTmpReg;
+ }
+#endif // !_TARGET_X86_
+
// If the size of this struct is larger than 16 bytes
// let's use SSE2 to be able to do 16 byte at a time
// loads and stores.
if (size >= XMM_REGSIZE_BYTES)
{
+#ifdef _TARGET_X86_
+ assert(!m_pushStkArg);
+#endif // _TARGET_X86_
assert(putArgNode->gtRsvdRegs != RBM_NONE);
- regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
- assert(genIsValidFloatReg(xmmReg));
size_t slots = size / XMM_REGSIZE_BYTES;
assert(putArgNode->gtGetOp1()->isContained());
@@ -3820,11 +3407,10 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV
while (slots-- > 0)
{
// Load
- genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(),
- offset); // Load the address of the child of the Obj node.
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src->gtGetOp1(), offset);
// Store
- emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset);
+ genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset);
offset += XMM_REGSIZE_BYTES;
}
@@ -3833,41 +3419,29 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV
// Fill the remainder (15 bytes or less) if there's one.
if ((size & 0xf) != 0)
{
- // Grab the integer temp register to emit the remaining loads and stores.
- regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
- assert(genIsValidIntReg(tmpReg));
-
- if ((size & 8) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 8;
- }
-
- if ((size & 4) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 4;
- }
-
- if ((size & 2) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 2;
+#ifdef _TARGET_X86_
+ if (m_pushStkArg)
+ {
+ // This case is currently supported only for the case where the total size is
+ // less than XMM_REGSIZE_BYTES. We need to push the remaining chunks in reverse
+ // order. However, morph has ensured that we have a struct that is an even
+ // multiple of TARGET_POINTER_SIZE, so we don't need to worry about alignment.
+ assert(((size & 0xc) == size) && (offset == 0));
+ // If we have a 4 byte chunk, load it from either offset 0 or 8, depending on
+ // whether we've got an 8 byte chunk, and then push it on the stack.
+ unsigned pushedBytes = genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, size & 0x8);
+ // Now if we have an 8 byte chunk, load it from offset 0 (it's the first chunk)
+ // and push it on the stack.
+ pushedBytes += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, 0);
}
-
- if ((size & 1) != 0)
+ else
+#endif // _TARGET_X86_
{
- genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
- emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+ offset += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, offset);
+ offset += genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
+ offset += genMove2IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
+ offset += genMove1IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
+ assert(offset == size);
}
}
}
@@ -3877,17 +3451,16 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV
//
// Arguments:
// putArgNode - the PutArgStk tree.
-// baseVarNum - the base var number, relative to which the by-val struct bits will go.
//
// Preconditions:
// The size argument of the PutArgStk (for structs) is a constant and is between
// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct bits will go.
//
-void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode)
{
assert(putArgNode->TypeGet() == TYP_STRUCT);
assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
- assert(baseVarNum != BAD_VAR_NUM);
// Make sure we got the arguments of the cpblk operation in the right registers
GenTreePtr dstAddr = putArgNode;
@@ -3897,7 +3470,7 @@ void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned base
assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
assert(srcAddr->isContained());
- genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
+ genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX);
instGen(INS_r_movsb);
}
@@ -3906,12 +3479,14 @@ void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned base
// must be cleared to zeroes. The native compiler doesn't clear the upper bits
// and there is no way to know if the caller is native or not. So, the upper
// 32 bits of Vector argument on stack are always cleared to zero.
-#ifdef FEATURE_SIMD
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
void CodeGen::genClearStackVec3ArgUpperBits()
{
#ifdef DEBUG
if (verbose)
+ {
printf("*************** In genClearStackVec3ArgUpperBits()\n");
+ }
#endif
assert(compiler->compGeneratingProlog);
@@ -3948,12 +3523,13 @@ void CodeGen::genClearStackVec3ArgUpperBits()
}
}
}
-#endif // FEATURE_SIMD
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+#endif // FEATURE_PUT_STRUCT_ARG_STK
// Generate code for CpObj nodes wich copy structs that have interleaved
// GC pointers.
-// This will generate a sequence of movsq instructions for the cases of non-gc members
+// This will generate a sequence of movsp instructions for the cases of non-gc members.
+// Note that movsp is an alias for movsd on x86 and movsq on x64.
// and calls to the BY_REF_ASSIGN helper otherwise.
void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
{
@@ -3961,6 +3537,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
GenTreePtr dstAddr = cpObjNode->Addr();
GenTreePtr source = cpObjNode->Data();
GenTreePtr srcAddr = nullptr;
+ var_types srcAddrType = TYP_BYREF;
bool sourceIsLocal = false;
assert(source->isContained());
@@ -3973,24 +3550,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
{
noway_assert(source->IsLocal());
sourceIsLocal = true;
- // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
- // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
- if (source->OperGet() == GT_LCL_VAR)
- {
- source->SetOper(GT_LCL_VAR_ADDR);
- }
- else
- {
- assert(source->OperGet() == GT_LCL_FLD);
- source->SetOper(GT_LCL_FLD_ADDR);
- }
- srcAddr = source;
}
bool dstOnStack = dstAddr->OperIsLocalAddr();
#ifdef DEBUG
- bool isRepMovsqUsed = false;
+ bool isRepMovspUsed = false;
assert(!dstAddr->isContained());
@@ -3998,44 +3563,40 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
// with CpObj, so this requires special logic.
assert(cpObjNode->gtGcPtrCount > 0);
- // MovSq instruction is used for copying non-gcref fields and it needs
- // src = RSI and dst = RDI.
+ // MovSp (alias for movsq on x64 and movsd on x86) instruction is used for copying non-gcref fields
+ // and it needs src = RSI and dst = RDI.
// Either these registers must not contain lclVars, or they must be dying or marked for spill.
// This is because these registers are incremented as we go through the struct.
- GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
- GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
- unsigned srcLclVarNum = BAD_VAR_NUM;
- unsigned dstLclVarNum = BAD_VAR_NUM;
- bool isSrcAddrLiveOut = false;
- bool isDstAddrLiveOut = false;
- if (genIsRegCandidateLocal(actualSrcAddr))
- {
- srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
- isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
- }
- if (genIsRegCandidateLocal(actualDstAddr))
- {
- dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
- isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
- }
- assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
- ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
- assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
- ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
+ if (!sourceIsLocal)
+ {
+ GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
+ GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
+ unsigned srcLclVarNum = BAD_VAR_NUM;
+ unsigned dstLclVarNum = BAD_VAR_NUM;
+ bool isSrcAddrLiveOut = false;
+ bool isDstAddrLiveOut = false;
+ if (genIsRegCandidateLocal(actualSrcAddr))
+ {
+ srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
+ isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+ }
+ if (genIsRegCandidateLocal(actualDstAddr))
+ {
+ dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
+ isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+ }
+ assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
+ ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
+ assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
+ ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
+ srcAddrType = srcAddr->TypeGet();
+ }
#endif // DEBUG
- // Consume these registers.
+ // Consume the operands and get them into the right registers.
// They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- if (sourceIsLocal)
- {
- inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF);
- genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA);
- }
- else
- {
- genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
- }
- gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
+ genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
+ gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType);
gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
unsigned slots = cpObjNode->gtSlots;
@@ -4046,23 +3607,23 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
#ifdef DEBUG
- // If the destination of the CpObj is on the stack
- // make sure we allocated RCX to emit rep movsq.
- regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
- assert(tmpReg == REG_RCX);
- isRepMovsqUsed = true;
+ // If the destination of the CpObj is on the stack, make sure we allocated
+ // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively).
+ assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);
+ regNumber tmpReg = REG_RCX;
+ isRepMovspUsed = true;
#endif // DEBUG
getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
- instGen(INS_r_movsq);
+ instGen(INS_r_movsp);
}
else
{
- // For small structs, it's better to emit a sequence of movsq than to
- // emit a rep movsq instruction.
+ // For small structs, it's better to emit a sequence of movsp than to
+ // emit a rep movsp instruction.
while (slots > 0)
{
- instGen(INS_movsq);
+ instGen(INS_movsp);
slots--;
}
}
@@ -4078,7 +3639,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
switch (gcPtrs[i])
{
case TYPE_GC_NONE:
- // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // Let's see if we can use rep movsp instead of a sequence of movsp instructions
// to save cycles and code size.
{
unsigned nonGcSlotCount = 0;
@@ -4090,12 +3651,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
} while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
// If we have a very small contiguous non-gc region, it's better just to
- // emit a sequence of movsq instructions
+ // emit a sequence of movsp instructions
if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
{
while (nonGcSlotCount > 0)
{
- instGen(INS_movsq);
+ instGen(INS_movsp);
nonGcSlotCount--;
}
}
@@ -4103,13 +3664,13 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
{
#ifdef DEBUG
// Otherwise, we can save code-size and improve CQ by emitting
- // rep movsq
- regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
- assert(tmpReg == REG_RCX);
- isRepMovsqUsed = true;
+ // rep movsp (alias for movsd/movsq for x86/x64)
+ assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);
+ regNumber tmpReg = REG_RCX;
+ isRepMovspUsed = true;
#endif // DEBUG
getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
- instGen(INS_r_movsq);
+ instGen(INS_r_movsp);
}
}
break;
@@ -4235,7 +3796,7 @@ void CodeGen::genJumpTable(GenTree* treeNode)
// generate code for the locked operations:
// GT_LOCKADD, GT_XCHG, GT_XADD
-void CodeGen::genLockedInstructions(GenTree* treeNode)
+void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
GenTree* data = treeNode->gtOp.gtOp2;
GenTree* addr = treeNode->gtOp.gtOp1;
@@ -4244,11 +3805,6 @@ void CodeGen::genLockedInstructions(GenTree* treeNode)
regNumber addrReg = addr->gtRegNum;
instruction ins;
- // all of these nodes implicitly do an indirection on op1
- // so create a temporary node to feed into the pattern matching
- GenTreeIndir i = indirForm(data->TypeGet(), addr);
- genConsumeReg(addr);
-
// The register allocator should have extended the lifetime of the address
// so that it is not used as the target.
noway_assert(addrReg != targetReg);
@@ -4258,7 +3814,7 @@ void CodeGen::genLockedInstructions(GenTree* treeNode)
assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
(data->gtFlags & GTF_VAR_DEATH) != 0);
- genConsumeIfReg(data);
+ genConsumeOperands(treeNode);
if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
{
inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
@@ -4284,6 +3840,10 @@ void CodeGen::genLockedInstructions(GenTree* treeNode)
default:
unreached();
}
+
+ // all of these nodes implicitly do an indirection on op1
+ // so create a temporary node to feed into the pattern matching
+ GenTreeIndir i = indirForm(data->TypeGet(), addr);
getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
if (treeNode->gtRegNum != REG_NA)
@@ -4459,22 +4019,22 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
GenTreePtr arrObj = arrOffset->gtArrObj;
regNumber tgtReg = arrOffset->gtRegNum;
-
- noway_assert(tgtReg != REG_NA);
+ assert(tgtReg != REG_NA);
unsigned dim = arrOffset->gtCurrDim;
unsigned rank = arrOffset->gtArrRank;
var_types elemType = arrOffset->gtArrElemType;
- // We will use a temp register for the offset*scale+effectiveIndex computation.
- regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
- regNumber tmpReg = genRegNumFromMask(tmpRegMask);
-
// First, consume the operands in the correct order.
regNumber offsetReg = REG_NA;
+ regNumber tmpReg = REG_NA;
if (!offsetNode->IsIntegralConst(0))
{
offsetReg = genConsumeReg(offsetNode);
+
+ // We will use a temp register for the offset*scale+effectiveIndex computation.
+ regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+ tmpReg = genRegNumFromMask(tmpRegMask);
}
else
{
@@ -4495,6 +4055,9 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
if (!offsetNode->IsIntegralConst(0))
{
+ assert(tmpReg != REG_NA);
+ assert(arrReg != REG_NA);
+
// Evaluate tgtReg = offsetReg*dim_size + indexReg.
// tmpReg is used to load dim_size and the result of the multiplication.
// Note that dim_size will never be negative.
@@ -4617,6 +4180,12 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_SUB_HI:
ins = INS_sbb;
break;
+ case GT_LSH_HI:
+ ins = INS_shld;
+ break;
+ case GT_RSH_LO:
+ ins = INS_shrd;
+ break;
#endif // !defined(_TARGET_64BIT_)
default:
unreached();
@@ -4654,6 +4223,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
regNumber operandReg = operand->gtRegNum;
GenTreePtr shiftBy = tree->gtGetOp2();
+
if (shiftBy->isContainedIntOrIImmed())
{
// First, move the operand to the destination register and
@@ -4672,12 +4242,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
// We must have the number of bits to shift stored in ECX, since we constrained this node to
// sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
// register destination requirement.
- regNumber shiftReg = shiftBy->gtRegNum;
- if (shiftReg != REG_RCX)
- {
- // Issue the mov to RCX:
- inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
- }
+ genCopyRegIfNeeded(shiftBy, REG_RCX);
// The operand to be shifted must not be in ECX
noway_assert(operandReg != REG_RCX);
@@ -4692,6 +4257,67 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
genProduceReg(tree);
}
+#ifdef _TARGET_X86_
+//------------------------------------------------------------------------
+// genCodeForShiftLong: Generates the code sequence for a GenTree node that
+// represents a three operand bit shift or rotate operation (<<Hi, >>Lo).
+//
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+// b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant
+//
+void CodeGen::genCodeForShiftLong(GenTreePtr tree)
+{
+ // Only the non-RMW case here.
+ genTreeOps oper = tree->OperGet();
+ assert(oper == GT_LSH_HI || oper == GT_RSH_LO);
+
+ GenTree* operand = tree->gtOp.gtOp1;
+ assert(operand->OperGet() == GT_LONG);
+ assert(!operand->gtOp.gtOp1->isContained());
+ assert(!operand->gtOp.gtOp2->isContained());
+
+ GenTree* operandLo = operand->gtGetOp1();
+ GenTree* operandHi = operand->gtGetOp2();
+
+ regNumber regLo = operandLo->gtRegNum;
+ regNumber regHi = operandHi->gtRegNum;
+
+ genConsumeOperands(tree->AsOp());
+
+ var_types targetType = tree->TypeGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+
+ GenTreePtr shiftBy = tree->gtGetOp2();
+
+ assert(shiftBy->isContainedIntOrIImmed());
+
+ unsigned int count = shiftBy->AsIntConCommon()->IconValue();
+
+ regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo;
+
+ if (regResult != tree->gtRegNum)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType);
+ }
+
+ if (oper == GT_LSH_HI)
+ {
+ inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regLo, count);
+ }
+ else
+ {
+ assert(oper == GT_RSH_LO);
+ inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regHi, count);
+ }
+
+ genProduceReg(tree);
+}
+#endif
+
//------------------------------------------------------------------------
// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
@@ -4739,182 +4365,13 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
// sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
// register destination requirement.
regNumber shiftReg = shiftBy->gtRegNum;
- if (shiftReg != REG_RCX)
- {
- // Issue the mov to RCX:
- inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
- }
+ genCopyRegIfNeeded(shiftBy, REG_RCX);
// The shiftBy operand is implicit, so call the unary version of emitInsRMW.
getEmitter()->emitInsRMW(ins, attr, storeInd);
}
}
-void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
-{
- regNumber dstReg = tree->gtRegNum;
- GenTree* unspillTree = tree;
-
- if (tree->gtOper == GT_RELOAD)
- {
- unspillTree = tree->gtOp.gtOp1;
- }
-
- if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
- {
- if (genIsRegCandidateLocal(unspillTree))
- {
- // Reset spilled flag, since we are going to load a local variable from its home location.
- unspillTree->gtFlags &= ~GTF_SPILLED;
-
- GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- // Load local variable from its home location.
- // In most cases the tree type will indicate the correct type to use for the load.
- // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
- // widened when loaded into a register), and its size is not the same as genActualType of
- // the type of the lclVar, then we need to change the type of the tree node when loading.
- // This situation happens due to "optimizations" that avoid a cast and
- // simply retype the node when using long type lclVar as an int.
- // While loading the int in that case would work for this use of the lclVar, if it is
- // later used as a long, we will have incorrectly truncated the long.
- // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
- // extending load.
-
- var_types treeType = unspillTree->TypeGet();
- if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
- {
- assert(!varTypeIsGC(varDsc));
- var_types spillType = genActualType(varDsc->lvType);
- unspillTree->gtType = spillType;
- inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
- unspillTree->gtType = treeType;
- }
- else
- {
- inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
- }
-
- unspillTree->SetInReg();
-
- // TODO-Review: We would like to call:
- // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
- // instead of the following code, but this ends up hitting this assert:
- // assert((regSet.rsMaskVars & regMask) == 0);
- // due to issues with LSRA resolution moves.
- // So, just force it for now. This probably indicates a condition that creates a GC hole!
- //
- // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
- // because the variable is not really going live or dead, but that method is somewhat poorly
- // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
- // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
-
- // Don't update the variable's location if we are just re-spilling it again.
-
- if ((unspillTree->gtFlags & GTF_SPILL) == 0)
- {
- genUpdateVarReg(varDsc, tree);
-#ifdef DEBUG
- if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
- varDsc->PrintVarReg();
- printf(" is becoming live ");
- compiler->printTreeID(unspillTree);
- printf("\n");
- }
-#endif // DEBUG
-
- regSet.AddMaskVars(genGetRegMask(varDsc));
- }
-
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- else if (unspillTree->IsMultiRegCall())
- {
- GenTreeCall* call = unspillTree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- GenTreeCopyOrReload* reloadTree = nullptr;
- if (tree->OperGet() == GT_RELOAD)
- {
- reloadTree = tree->AsCopyOrReload();
- }
-
- // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
- // one or more of its result regs are spilled. Call node needs to be
- // queried to know which specific result regs to be unspilled.
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILLED) != 0)
- {
- var_types dstType = retTypeDesc->GetReturnRegType(i);
- regNumber unspillTreeReg = call->GetRegNumByIdx(i);
-
- if (reloadTree != nullptr)
- {
- dstReg = reloadTree->GetRegNumByIdx(i);
- if (dstReg == REG_NA)
- {
- dstReg = unspillTreeReg;
- }
- }
- else
- {
- dstReg = unspillTreeReg;
- }
-
- TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
- getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
- 0);
- compiler->tmpRlsTemp(t);
- gcInfo.gcMarkRegPtrVal(dstReg, dstType);
- }
- }
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- }
- else
- {
- TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
- getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
- t->tdTempNum(), 0);
- compiler->tmpRlsTemp(t);
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- }
-}
-
-// Do Liveness update for a subnodes that is being consumed by codegen
-// including the logic for reload in case is needed and also takes care
-// of locating the value on the desired register.
-void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
-{
- if (needReg == REG_NA)
- {
- return;
- }
- regNumber treeReg = genConsumeReg(tree);
- if (treeReg != needReg)
- {
- inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
- }
-}
-
void CodeGen::genRegCopy(GenTree* treeNode)
{
assert(treeNode->OperGet() == GT_COPY);
@@ -5022,662 +4479,6 @@ void CodeGen::genRegCopy(GenTree* treeNode)
genProduceReg(treeNode);
}
-// Check that registers are consumed in the right order for the current node being generated.
-#ifdef DEBUG
-void CodeGen::genCheckConsumeNode(GenTree* treeNode)
-{
- // GT_PUTARG_REG is consumed out of order.
- if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
- {
- if (lastConsumedNode != nullptr)
- {
- if (treeNode == lastConsumedNode)
- {
- if (verbose)
- {
- printf("Node was consumed twice:\n ");
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
- }
- else
- {
- if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
- {
- printf("Nodes were consumed out-of-order:\n");
- compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
- // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
- }
- }
- lastConsumedNode = treeNode;
- }
-}
-#endif // DEBUG
-
-//--------------------------------------------------------------------
-// genConsumeReg: Do liveness update for a subnode that is being
-// consumed by codegen.
-//
-// Arguments:
-// tree - GenTree node
-//
-// Return Value:
-// Returns the reg number of tree.
-// In case of multi-reg call node returns the first reg number
-// of the multi-reg return.
-regNumber CodeGen::genConsumeReg(GenTree* tree)
-{
- if (tree->OperGet() == GT_COPY)
- {
- genRegCopy(tree);
- }
-
- // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
- // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
- // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
- // always using GT_COPY to make the lclVar location explicit.
- // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
- // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
- // the lclVar (normally when a lclVar is spilled it is then used from its former register
- // location, which matches the gtRegNum on the node).
- // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
- // because if it's on the stack it will always get reloaded into tree->gtRegNum).
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
- {
- inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
- }
- }
-
- genUnspillRegIfNeeded(tree);
-
- // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
- genUpdateLife(tree);
-
- assert(tree->gtHasReg());
-
- // there are three cases where consuming a reg means clearing the bit in the live mask
- // 1. it was not produced by a local
- // 2. it was produced by a local that is going dead
- // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
-
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- assert(varDsc->lvLRACandidate);
-
- if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
- }
- else if (varDsc->lvRegNum == REG_STK)
- {
- // We have loaded this into a register only temporarily
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
- }
-
- genCheckConsumeNode(tree);
- return tree->gtRegNum;
-}
-
-// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
-void CodeGen::genConsumeAddress(GenTree* addr)
-{
- if (!addr->isContained())
- {
- genConsumeReg(addr);
- }
- else if (addr->OperGet() == GT_LEA)
- {
- genConsumeAddrMode(addr->AsAddrMode());
- }
-}
-
-// do liveness update for a subnode that is being consumed by codegen
-void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
-{
- genConsumeOperands(addr);
-}
-
-void CodeGen::genConsumeRegs(GenTree* tree)
-{
-#if !defined(_TARGET_64BIT_)
- if (tree->OperGet() == GT_LONG)
- {
- genConsumeRegs(tree->gtGetOp1());
- genConsumeRegs(tree->gtGetOp2());
- return;
- }
-#endif // !defined(_TARGET_64BIT_)
-
- if (tree->isContained())
- {
- if (tree->isContainedSpillTemp())
- {
- // spill temps are un-tracked and hence no need to update life
- }
- else if (tree->isIndir())
- {
- genConsumeAddress(tree->AsIndir()->Addr());
- }
- else if (tree->OperGet() == GT_AND)
- {
- // This is the special contained GT_AND that we created in Lowering::LowerCmp()
- // Now we need to consume the operands of the GT_AND node.
- genConsumeOperands(tree->AsOp());
- }
- else if (tree->OperGet() == GT_LCL_VAR)
- {
- // A contained lcl var must be living on stack and marked as reg optional.
- unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-
- noway_assert(varDsc->lvRegNum == REG_STK);
- noway_assert(tree->IsRegOptional());
-
- // Update the life of reg optional lcl var.
- genUpdateLife(tree);
- }
- else
- {
- assert(tree->OperIsLeaf());
- }
- }
- else
- {
- genConsumeReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
-//
-// Arguments:
-// tree - the GenTreeOp whose operands will have their liveness updated.
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Note that this logic is localized here because we must do the liveness update in
-// the correct execution order. This is important because we may have two operands
-// that involve the same lclVar, and if one is marked "lastUse" we must handle it
-// after the first.
-
-void CodeGen::genConsumeOperands(GenTreeOp* tree)
-{
- GenTree* firstOp = tree->gtOp1;
- GenTree* secondOp = tree->gtOp2;
- if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
- {
- assert(secondOp != nullptr);
- firstOp = secondOp;
- secondOp = tree->gtOp1;
- }
- if (firstOp != nullptr)
- {
- genConsumeRegs(firstOp);
- }
- if (secondOp != nullptr)
- {
- genConsumeRegs(secondOp);
- }
-}
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-//------------------------------------------------------------------------
-// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
-// Also loads in the right register the addresses of the
-// src/dst for rep mov operation.
-//
-// Arguments:
-// putArgNode - the PUTARG_STK tree.
-// dstReg - the dstReg for the rep move operation.
-// srcReg - the srcReg for the rep move operation.
-// sizeReg - the sizeReg for the rep move operation.
-// baseVarNum - the varnum for the local used for placing the "by-value" args on the stack.
-//
-// Return Value:
-// None.
-//
-// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
-// for copying on the stack a struct with references.
-// The source address/offset is determined from the address on the GT_OBJ node, while
-// the destination address is the address contained in 'baseVarNum' plus the offset
-// provided in the 'putArgNode'.
-
-void CodeGen::genConsumePutStructArgStk(
- GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
-{
- assert(varTypeIsStruct(putArgNode));
- assert(baseVarNum != BAD_VAR_NUM);
-
- // The putArgNode children are always contained. We should not consume any registers.
- assert(putArgNode->gtGetOp1()->isContained());
-
- GenTree* dstAddr = putArgNode;
-
- // Get the source address.
- GenTree* src = putArgNode->gtGetOp1();
- assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
- GenTree* srcAddr = src->gtGetOp1();
-
- size_t size = putArgNode->getArgSize();
-
- assert(dstReg != REG_NA);
- assert(srcReg != REG_NA);
-
- // Consume the registers only if they are not contained or set to REG_NA.
- if (srcAddr->gtRegNum != REG_NA)
- {
- genConsumeReg(srcAddr);
- }
-
- // If the op1 is already in the dstReg - nothing to do.
- // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
- if (dstAddr->gtRegNum != dstReg)
- {
- // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
- // for tail calls) in RDI.
- // Destination is always local (on the stack) - use EA_PTRSIZE.
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
- }
-
- if (srcAddr->gtRegNum != srcReg)
- {
- if (srcAddr->OperIsLocalAddr())
- {
- // The OperLocalAddr is always contained.
- assert(srcAddr->isContained());
- GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
-
- // Generate LEA instruction to load the LclVar address in RSI.
- // Source is known to be on the stack. Use EA_PTRSIZE.
- unsigned int offset = 0;
- if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
- {
- offset = srcAddr->AsLclFld()->gtLclOffs;
- }
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
- }
- else
- {
- assert(srcAddr->gtRegNum != REG_NA);
- // Source is not known to be on the stack. Use EA_BYREF.
- getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
- }
- }
-
- if (sizeReg != REG_NA)
- {
- inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
- }
-}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
-//------------------------------------------------------------------------
-// genConsumeBlockSize: Ensure that the block size is in the given register
-//
-// Arguments:
-// blkNode - The block node
-// sizeReg - The register into which the block's size should go
-//
-
-void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
-{
- if (sizeReg != REG_NA)
- {
- unsigned blockSize = blkNode->Size();
- if (blockSize != 0)
- {
- assert(blkNode->gtRsvdRegs == genRegMask(sizeReg));
- genSetRegToIcon(sizeReg, blockSize);
- }
- else
- {
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- genConsumeReg(blkNode->AsDynBlk()->gtDynamicSize);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockDst: Ensure that the block destination address is in its
-// allocated register.
-// Arguments:
-// blkNode - The block node
-//
-
-void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode)
-{
- GenTree* dstAddr = blkNode->Addr();
- genConsumeReg(dstAddr);
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockSrc: Ensure that the block source address is in its
-// allocated register if it is non-local.
-// Arguments:
-// blkNode - The block node
-//
-// Return Value:
-// Returns the source address node, if it is non-local,
-// and nullptr otherwise.
-
-GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
-{
- GenTree* src = blkNode->Data();
- if (blkNode->OperIsCopyBlkOp())
- {
- // For a CopyBlk we need the address of the source.
- if (src->OperGet() == GT_IND)
- {
- src = src->gtOp.gtOp1;
- }
- else
- {
- // This must be a local.
- // For this case, there is no source address register, as it is a
- // stack-based address.
- assert(src->OperIsLocal());
- return nullptr;
- }
- }
- genConsumeReg(src);
- return src;
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockOp: Ensure that the block's operands are enregistered
-// as needed.
-// Arguments:
-// blkNode - The block node
-//
-// Notes:
-// This ensures that the operands are consumed in the proper order to
-// obey liveness modeling.
-
-void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
-{
- // We have to consume the registers, and perform any copies, in the actual execution order.
- // The nominal order is: dst, src, size. However this may have been changed
- // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
- // block size.
- // Note that the register allocator ensures that the registers ON THE NODES will not interfere
- // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
- // Further, it ensures that they will not interfere with one another if they are then copied
- // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
- // then, that we first consume all the operands, then do any necessary moves.
-
- GenTree* dstAddr = blkNode->Addr();
- GenTree* src = nullptr;
- unsigned blockSize = blkNode->Size();
- GenTree* size = nullptr;
- bool evalSizeFirst = true;
-
- if (blkNode->OperGet() == GT_STORE_DYN_BLK)
- {
- evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst;
- size = blkNode->AsDynBlk()->gtDynamicSize;
- }
-
- // First, consusme all the sources in order
- if (evalSizeFirst)
- {
- genConsumeBlockSize(blkNode, sizeReg);
- }
- if (blkNode->IsReverseOp())
- {
- src = genConsumeBlockSrc(blkNode);
- genConsumeBlockDst(blkNode);
- }
- else
- {
- genConsumeBlockDst(blkNode);
- src = genConsumeBlockSrc(blkNode);
- }
- if (!evalSizeFirst)
- {
- genConsumeBlockSize(blkNode, sizeReg);
- }
- // Next, perform any necessary moves.
- if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg))
- {
- inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
- }
- if (blkNode->IsReverseOp())
- {
- if ((src != nullptr) && (src->gtRegNum != srcReg))
- {
- inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
- }
- if (dstAddr->gtRegNum != dstReg)
- {
- inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
- }
- }
- else
- {
- if (dstAddr->gtRegNum != dstReg)
- {
- inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
- }
- if ((src != nullptr) && (src->gtRegNum != srcReg))
- {
- inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
- }
- }
- if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg))
- {
- inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
- }
-}
-
-//-------------------------------------------------------------------------
-// genProduceReg: do liveness update for register produced by the current
-// node in codegen.
-//
-// Arguments:
-// tree - Gentree node
-//
-// Return Value:
-// None.
-void CodeGen::genProduceReg(GenTree* tree)
-{
- if (tree->gtFlags & GTF_SPILL)
- {
- // Code for GT_COPY node gets generated as part of consuming regs by its parent.
- // A GT_COPY node in turn produces reg result and it should never be marked to
- // spill.
- //
- // Similarly GT_RELOAD node gets generated as part of consuming regs by its
- // parent and should never be marked for spilling.
- noway_assert(!tree->IsCopyOrReload());
-
- if (genIsRegCandidateLocal(tree))
- {
- // Store local variable to its home location.
- tree->gtFlags &= ~GTF_REG_VAL;
- // Ensure that lclVar stores are typed correctly.
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
- (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
- inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
- }
- else
- {
- // In case of multi-reg call node, spill flag on call node
- // indicates that one or more of its allocated regs need to
- // be spilled. Call node needs to be further queried to
- // know which of its result regs needs to be spilled.
- if (tree->IsMultiRegCall())
- {
- GenTreeCall* call = tree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILL) != 0)
- {
- regNumber reg = call->GetRegNumByIdx(i);
- call->SetInReg();
- regSet.rsSpillTree(reg, call, i);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- }
- }
- }
- else
- {
- tree->SetInReg();
- regSet.rsSpillTree(tree->gtRegNum, tree);
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
-
- tree->gtFlags |= GTF_SPILLED;
- tree->gtFlags &= ~GTF_SPILL;
-
- return;
- }
- }
-
- genUpdateLife(tree);
-
- // If we've produced a register, mark it as a pointer, as needed.
- if (tree->gtHasReg())
- {
- // We only mark the register in the following cases:
- // 1. It is not a register candidate local. In this case, we're producing a
- // register from a local, but the local is not a register candidate. Thus,
- // we must be loading it as a temp register, and any "last use" flag on
- // the register wouldn't be relevant.
- // 2. The register candidate local is going dead. There's no point to mark
- // the register as live, with a GC pointer, if the variable is dead.
- if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
- {
- // Multi-reg call node will produce more than one register result.
- // Mark all the regs produced by call node.
- if (tree->IsMultiRegCall())
- {
- GenTreeCall* call = tree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- regNumber reg = call->GetRegNumByIdx(i);
- var_types type = retTypeDesc->GetReturnRegType(i);
- gcInfo.gcMarkRegPtrVal(reg, type);
- }
- }
- else if (tree->IsCopyOrReloadOfMultiRegCall())
- {
- // we should never see reload of multi-reg call here
- // because GT_RELOAD gets generated in reg consuming path.
- noway_assert(tree->OperGet() == GT_COPY);
-
- // A multi-reg GT_COPY node produces those regs to which
- // copy has taken place.
- GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
- GenTreeCall* call = copy->gtGetOp1()->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber fromReg = call->GetRegNumByIdx(i);
- regNumber toReg = copy->GetRegNumByIdx(i);
-
- if (toReg != REG_NA)
- {
- gcInfo.gcMarkRegPtrVal(toReg, type);
- }
- }
- }
- else
- {
- gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
- }
- }
- }
- tree->SetInReg();
-}
-
-// transfer gc/byref status of src reg to dst reg
-void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
-{
- regMaskTP srcMask = genRegMask(src);
- regMaskTP dstMask = genRegMask(dst);
-
- if (gcInfo.gcRegGCrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetGCref(dstMask);
- }
- else if (gcInfo.gcRegByrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetByref(dstMask);
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(dstMask);
- }
-}
-
-// generates an ip-relative call or indirect call via reg ('call reg')
-// pass in 'addr' for a relative call or 'base' for a indirect register call
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset,
- regNumber base,
- bool isJump,
- bool isNoGC)
-{
-#if !defined(_TARGET_X86_)
- ssize_t argSize = 0;
-#endif // !defined(_TARGET_X86_)
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
- emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
-}
-
-// generates an indirect call via addressing mode (call []) given an indir node
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset)
-{
-#if !defined(_TARGET_X86_)
- ssize_t argSize = 0;
-#endif // !defined(_TARGET_X86_)
- genConsumeAddress(indir->Addr());
-
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
- argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
- indir->Base() ? indir->Base()->gtRegNum : REG_NA,
- indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
-}
-
//------------------------------------------------------------------------
// genStoreInd: Generate code for a GT_STOREIND node.
//
@@ -5724,16 +4525,10 @@ void CodeGen::genStoreInd(GenTreePtr node)
noway_assert(data->gtRegNum != REG_ARG_0);
// addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_ARG_0)
- {
- inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
- }
+ genCopyRegIfNeeded(addr, REG_ARG_0);
// data goes in REG_ARG_1
- if (data->gtRegNum != REG_ARG_1)
- {
- inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
- }
+ genCopyRegIfNeeded(data, REG_ARG_1);
genGCWriteBarrier(storeInd, writeBarrierForm);
}
@@ -5821,6 +4616,23 @@ void CodeGen::genStoreInd(GenTreePtr node)
assert(rmwSrc == data->gtGetOp2());
genCodeForShiftRMW(storeInd);
}
+ else if (!compiler->opts.compDbgCode && data->OperGet() == GT_ADD &&
+ (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1)))
+ {
+ // Generate "inc/dec [mem]" instead of "add/sub [mem], 1".
+ //
+ // Notes:
+ // 1) Global morph transforms GT_SUB(x, +/-1) into GT_ADD(x, -/+1).
+ // 2) TODO-AMD64: Debugger routine NativeWalker::Decode() runs into
+ // an assert while decoding ModR/M byte of "inc dword ptr [rax]".
+ // It is not clear whether Decode() can handle all possible
+ // addr modes with inc/dec. For this reason, inc/dec [mem]
+ // is not generated while generating debuggable code. Update
+ // the above if condition once Decode() routine is fixed.
+ assert(rmwSrc->isContainedIntOrIImmed());
+ instruction ins = rmwSrc->IsIntegralConst(1) ? INS_inc : INS_dec;
+ getEmitter()->emitInsRMW(ins, emitTypeSize(storeInd), storeInd);
+ }
else
{
// generate code for remaining binary RMW memory ops like add/sub/and/or/xor
@@ -5905,10 +4717,7 @@ bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarri
// call write_barrier_helper_reg
// addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
- {
- inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
- }
+ genCopyRegIfNeeded(addr, REG_WRITE_BARRIER);
unsigned tgtAnywhere = 0;
if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
@@ -5943,10 +4752,28 @@ void CodeGen::genCallInstruction(GenTreePtr node)
// all virtuals should have been expanded into a control expression
assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+ // Insert a GS check if necessary
+ if (call->IsTailCallViaHelper())
+ {
+ if (compiler->getNeedsGSSecurityCookie())
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // If either of the conditions below is true, we will need a temporary register in order to perform the GS
+ // cookie check. When FEATURE_FIXED_OUT_ARGS is disabled, we save and restore the temporary register using
+ // push/pop. When FEATURE_FIXED_OUT_ARGS is enabled, however, we need an alternative solution. For now,
+ // though, the tail prefix is ignored on all platforms that use fixed out args, so we should never hit this
+ // case.
+ assert(compiler->gsGlobalSecurityCookieAddr == nullptr);
+ assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
+#endif
+ genEmitGSCookieCheck(true);
+ }
+ }
+
// Consume all the arg regs
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->IsList());
+ assert(list->OperIsList());
GenTreePtr argNode = list->Current();
@@ -5960,13 +4787,13 @@ void CodeGen::genCallInstruction(GenTreePtr node)
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
// Deal with multi register passed struct args.
- if (argNode->OperGet() == GT_LIST)
+ if (argNode->OperGet() == GT_FIELD_LIST)
{
- GenTreeArgList* argListPtr = argNode->AsArgList();
- unsigned iterationNum = 0;
- for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
+ unsigned iterationNum = 0;
+ for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), iterationNum++)
{
- GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ GenTreePtr putArgRegNode = fieldListPtr->gtOp.gtOp1;
assert(putArgRegNode->gtOper == GT_PUTARG_REG);
regNumber argReg = REG_NA;
@@ -6036,20 +4863,34 @@ void CodeGen::genCallInstruction(GenTreePtr node)
{
assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
}
+ if ((arg->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp1()->OperGet() == GT_FIELD_LIST))
+ {
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert(curArgTabEntry);
+ stackArgBytes += curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
+ }
+ else
#endif // defined(_TARGET_X86_)
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (genActualType(arg->TypeGet()) == TYP_STRUCT)
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ if (genActualType(arg->TypeGet()) == TYP_STRUCT)
{
assert(arg->OperGet() == GT_PUTARG_STK);
- GenTreeObj* obj = arg->gtGetOp1()->AsObj();
- stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass);
+ GenTreeObj* obj = arg->gtGetOp1()->AsObj();
+ unsigned argBytes = (unsigned)roundUp(obj->gtBlkSize, TARGET_POINTER_SIZE);
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes);
+#endif // DEBUG
+ stackArgBytes += argBytes;
}
else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+#endif // FEATURE_PUT_STRUCT_ARG_STK
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
+ }
}
args = args->gtOp.gtOp2;
}
@@ -6098,10 +4939,7 @@ void CodeGen::genCallInstruction(GenTreePtr node)
assert(target != nullptr);
genConsumeReg(target);
- if (target->gtRegNum != REG_RAX)
- {
- inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
- }
+ genCopyRegIfNeeded(target, REG_RAX);
return;
}
@@ -6141,7 +4979,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
bool fPossibleSyncHelperCall = false;
CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
-#ifdef DEBUGGING_SUPPORT
// We need to propagate the IL offset information to the call instruction, so we can emit
// an IL to native mapping record for the call, to support managed return value debugging.
// We don't want tail call helper calls that were converted from normal calls to get a record,
@@ -6150,7 +4987,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
{
(void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
}
-#endif // DEBUGGING_SUPPORT
#if defined(_TARGET_X86_)
// If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
@@ -6167,7 +5003,38 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (target != nullptr)
{
- if (target->isContainedIndir())
+#ifdef _TARGET_X86_
+ if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
+ {
+ // On x86, we need to generate a very specific pattern for indirect VSD calls:
+ //
+ // 3-byte nop
+ // call dword ptr [eax]
+ //
+ // Where EAX is also used as an argument to the stub dispatch helper. Make
+ // sure that the call target address is computed into EAX in this case.
+
+ assert(REG_VIRTUAL_STUB_PARAM == REG_VIRTUAL_STUB_TARGET);
+
+ assert(target->isContainedIndir());
+ assert(target->OperGet() == GT_IND);
+
+ GenTree* addr = target->AsIndir()->Addr();
+ assert(!addr->isContained());
+
+ genConsumeReg(addr);
+ genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET);
+
+ getEmitter()->emitIns_Nop(3);
+ getEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD), methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr, argSizeForEmitter,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ ilOffset, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0);
+ }
+ else
+#endif
+ if (target->isContainedIndir())
{
if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
{
@@ -6977,8 +5844,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
genConsumeOperands(tree);
- assert(targetReg != REG_NA);
-
GenTreePtr loOp1 = op1->gtGetOp1();
GenTreePtr hiOp1 = op1->gtGetOp2();
GenTreePtr loOp2 = op2->gtGetOp1();
@@ -6992,6 +5857,12 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
// Emit the compare instruction
getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
+ // If the result is not being materialized in a register, we're done.
+ if (targetReg == REG_NA)
+ {
+ return;
+ }
+
// Generate the first jump for the high compare
CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
@@ -7015,10 +5886,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
inst_SET(jumpKindLo, targetReg);
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- genProduceReg(tree);
-
inst_JMP(EJ_jmp, labelFinal);
// Define the label for hi jump target here. If we have jumped here, we want to set
@@ -7027,11 +5894,10 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
genDefineTempLabel(labelHi);
inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
+ genDefineTempLabel(labelFinal);
// Set the higher bytes to 0
inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
genProduceReg(tree);
-
- genDefineTempLabel(labelFinal);
}
else
{
@@ -7062,152 +5928,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
genProduceReg(tree);
}
}
-
-//------------------------------------------------------------------------
-// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
-// is not manifested in a register.
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-// Comments:
-// For long compares, we need to compare the high parts of operands first, then the low parts.
-// We only have to do the low compare if the high parts of the operands are equal.
-//
-// In the case where the result of a rel-op is not realized in a register, we generate:
-//
-// Opcode x86 equivalent Comment
-// ------ -------------- -------
-//
-// GT_LT; unsigned cmp hiOp1,hiOp2
-// jb trueLabel
-// ja falseLabel
-// cmp loOp1,loOp2
-// jb trueLabel
-// falseLabel:
-//
-// GT_LE; unsigned cmp hiOp1,hiOp2
-// jb trueLabel
-// ja falseLabel
-// cmp loOp1,loOp2
-// jbe trueLabel
-// falseLabel:
-//
-// GT_GT; unsigned cmp hiOp1,hiOp2
-// ja trueLabel
-// jb falseLabel
-// cmp loOp1,loOp2
-// ja trueLabel
-// falseLabel:
-//
-// GT_GE; unsigned cmp hiOp1,hiOp2
-// ja trueLabel
-// jb falseLabel
-// cmp loOp1,loOp2
-// jae trueLabel
-// falseLabel:
-//
-// GT_LT; signed cmp hiOp1,hiOp2
-// jl trueLabel
-// jg falseLabel
-// cmp loOp1,loOp2
-// jb trueLabel
-// falseLabel:
-//
-// GT_LE; signed cmp hiOp1,hiOp2
-// jl trueLabel
-// jg falseLabel
-// cmp loOp1,loOp2
-// jbe trueLabel
-// falseLabel:
-//
-// GT_GT; signed cmp hiOp1,hiOp2
-// jg trueLabel
-// jl falseLabel
-// cmp loOp1,loOp2
-// ja trueLabel
-// falseLabel:
-//
-// GT_GE; signed cmp hiOp1,hiOp2
-// jg trueLabel
-// jl falseLabel
-// cmp loOp1,loOp2
-// jae trueLabel
-// falseLabel:
-//
-// GT_EQ; cmp hiOp1,hiOp2
-// jne falseLabel
-// cmp loOp1,loOp2
-// je trueLabel
-// falseLabel:
-//
-// GT_NE; cmp hiOp1,hiOp2
-// jne labelTrue
-// cmp loOp1,loOp2
-// jne trueLabel
-// falseLabel:
-//
-// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
-void CodeGen::genJTrueLong(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
-
- assert(varTypeIsLong(op1->TypeGet()));
- assert(varTypeIsLong(op2->TypeGet()));
-
- regNumber targetReg = treeNode->gtRegNum;
-
- assert(targetReg == REG_NA);
-
- GenTreePtr loOp1 = op1->gtGetOp1();
- GenTreePtr hiOp1 = op1->gtGetOp2();
- GenTreePtr loOp2 = op2->gtGetOp1();
- GenTreePtr hiOp2 = op2->gtGetOp2();
-
- // Emit the compare instruction
- getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
-
- // Generate the first jump for the high compare
- CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
- // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
- BasicBlock* falseLabel = genCreateTempLabel();
-
- emitJumpKind jumpKindHi[2];
-
- // Generate the jumps for the high compare
- genJumpKindsForTreeLongHi(tree, jumpKindHi);
-
- BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
-
- if (jumpKindHi[0] != EJ_NONE)
- {
- inst_JMP(jumpKindHi[0], trueLabel);
- }
-
- if (jumpKindHi[1] != EJ_NONE)
- {
- inst_JMP(jumpKindHi[1], falseLabel);
- }
-
- // The low jump must be unsigned
- emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
- // Emit the comparison and the jump to the trueLabel
- getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
-
- inst_JMP(jumpKindLo, trueLabel);
-
- // Generate falseLabel, which is the false path. We will jump here if the high compare is false
- // or fall through if the low compare is false.
- genDefineTempLabel(falseLabel);
-}
#endif //! defined(_TARGET_64BIT_)
//------------------------------------------------------------------------
@@ -7339,19 +6059,77 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
{
assert(treeNode->OperIsCompare());
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+ regNumber targetReg = treeNode->gtRegNum;
+
+ // Case of op1 == 0 or op1 != 0:
+ // Optimize generation of 'test' instruction if op1 sets flags.
+ //
+ // Note that if LSRA has inserted any GT_RELOAD/GT_COPY before
+ // op1, it will not modify the flags set by codegen of op1.
+ // Similarly op1 could also be reg-optional at its use and
+ // it was spilled after producing its result in a register.
+ // Spill code too will not modify the flags set by op1.
+ GenTree* realOp1 = op1->gtSkipReloadOrCopy();
+ if (realOp1->gtSetFlags())
+ {
+ // op1 must set ZF and SF flags
+ assert(realOp1->gtSetZSFlags());
+
+ // Must be (in)equality against zero.
+ assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE);
+ assert(op2->IsIntegralConst(0));
+ assert(op2->isContained());
+
+ // Just consume the operands
+ genConsumeOperands(tree);
+
+ // No need to generate test instruction since
+ // op1 sets flags
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+
+ return;
+ }
+
+#ifdef FEATURE_SIMD
+ // If we have GT_JTRUE(GT_EQ/NE(GT_SIMD((in)Equality, v1, v2), true/false)),
+ // then we don't need to generate code for GT_EQ/GT_NE, since SIMD (in)Equality intrinsic
+ // would set or clear Zero flag.
+ if ((targetReg == REG_NA) && (tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE))
+ {
+ // Is it a SIMD (in)Equality that doesn't need to materialize result into a register?
+ if ((op1->gtRegNum == REG_NA) && op1->IsSIMDEqualityOrInequality())
+ {
+ // Must be comparing against true or false.
+ assert(op2->IsIntegralConst(0) || op2->IsIntegralConst(1));
+ assert(op2->isContainedIntOrIImmed());
+
+ // In this case SIMD (in)Equality will set or clear
+ // Zero flag, based on which GT_JTRUE would generate
+ // the right conditional jump.
+ return;
+ }
+ }
+#endif // FEATURE_SIMD
genConsumeOperands(tree);
instruction ins;
emitAttr cmpAttr;
- regNumber targetReg = treeNode->gtRegNum;
- assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
+ // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm.
+ // https://github.com/dotnet/coreclr/issues/7270
+ assert(!op1->isContainedIntOrIImmed()); // We no longer support
assert(!varTypeIsFloating(op2Type));
#ifdef _TARGET_X86_
@@ -7387,7 +6165,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
{
// Do we have a short compare against a constant in op2?
//
- // We checked for this case in LowerCmp() and if we can perform a small
+ // We checked for this case in TreeNodeInfoInitCmp() and if we can perform a small
// compare immediate we labeled this compare with a GTF_RELOP_SMALL
// and for unsigned small non-equality compares the GTF_UNSIGNED flag.
//
@@ -7442,12 +6220,11 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
if (op1->isContained())
{
// op1 can be a contained memory op
- // or the special contained GT_AND that we created in Lowering::LowerCmp()
+ // or the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
//
- if ((op1->OperGet() == GT_AND))
+ if ((op1->OperGet() == GT_AND) && op1->gtGetOp2()->isContainedIntOrIImmed() &&
+ ((tree->OperGet() == GT_EQ) || (tree->OperGet() == GT_NE)))
{
- noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
-
ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
op1 = op1->gtOp.gtOp1; // overwrite op1
@@ -7561,6 +6338,93 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
}
}
+#if !defined(_TARGET_64BIT_)
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for long to int casts on x86.
+//
+// Arguments:
+// cast - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The cast node and its sources (via GT_LONG) must have been assigned registers.
+// The destination cannot be a floating point type or a small integer type.
+//
+void CodeGen::genLongToIntCast(GenTree* cast)
+{
+ assert(cast->OperGet() == GT_CAST);
+
+ GenTree* src = cast->gtGetOp1();
+ noway_assert(src->OperGet() == GT_LONG);
+
+ genConsumeRegs(src);
+
+ var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG;
+ var_types dstType = cast->CastToType();
+ regNumber loSrcReg = src->gtGetOp1()->gtRegNum;
+ regNumber hiSrcReg = src->gtGetOp2()->gtRegNum;
+ regNumber dstReg = cast->gtRegNum;
+
+ assert((dstType == TYP_INT) || (dstType == TYP_UINT));
+ assert(genIsValidIntReg(loSrcReg));
+ assert(genIsValidIntReg(hiSrcReg));
+ assert(genIsValidIntReg(dstReg));
+
+ if (cast->gtOverflow())
+ {
+ //
+ // Generate an overflow check for [u]long to [u]int casts:
+ //
+ // long -> int - check if the upper 33 bits are all 0 or all 1
+ //
+ // ulong -> int - check if the upper 33 bits are all 0
+ //
+ // long -> uint - check if the upper 32 bits are all 0
+ // ulong -> uint - check if the upper 32 bits are all 0
+ //
+
+ if ((srcType == TYP_LONG) && (dstType == TYP_INT))
+ {
+ BasicBlock* allOne = genCreateTempLabel();
+ BasicBlock* success = genCreateTempLabel();
+
+ inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
+ inst_JMP(EJ_js, allOne);
+
+ inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+ inst_JMP(EJ_jmp, success);
+
+ genDefineTempLabel(allOne);
+ inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE);
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+
+ genDefineTempLabel(success);
+ }
+ else
+ {
+ if ((srcType == TYP_ULONG) && (dstType == TYP_INT))
+ {
+ inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
+ genJumpToThrowHlpBlk(EJ_js, SCK_OVERFLOW);
+ }
+
+ inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+ }
+ }
+
+ if (dstReg != loSrcReg)
+ {
+ inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE);
+ }
+
+ genProduceReg(cast);
+}
+#endif
+
//------------------------------------------------------------------------
// genIntToIntCast: Generate code for an integer cast
// This method handles integer overflow checking casts
@@ -7584,13 +6448,22 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode)
{
assert(treeNode->OperGet() == GT_CAST);
- GenTreePtr castOp = treeNode->gtCast.CastOp();
- regNumber targetReg = treeNode->gtRegNum;
- regNumber sourceReg = castOp->gtRegNum;
- var_types dstType = treeNode->CastToType();
- bool isUnsignedDst = varTypeIsUnsigned(dstType);
- var_types srcType = genActualType(castOp->TypeGet());
- bool isUnsignedSrc = varTypeIsUnsigned(srcType);
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ var_types srcType = genActualType(castOp->TypeGet());
+
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(srcType))
+ {
+ genLongToIntCast(treeNode);
+ return;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+ var_types dstType = treeNode->CastToType();
+ bool isUnsignedDst = varTypeIsUnsigned(dstType);
+ bool isUnsignedSrc = varTypeIsUnsigned(srcType);
// if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
@@ -7948,7 +6821,7 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
genConsumeOperands(treeNode->AsOp());
- if (srcType == dstType && targetReg == op1->gtRegNum)
+ if (srcType == dstType && (!op1->isContained() && (targetReg == op1->gtRegNum)))
{
// source and destinations types are the same and also reside in the same register.
// we just need to consume and produce the reg in this case.
@@ -7999,7 +6872,8 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
#if !defined(_TARGET_64BIT_)
- NYI_IF(varTypeIsLong(srcType), "Conversion from long to float");
+ // We expect morph to replace long to float/double casts with helper calls
+ noway_assert(!varTypeIsLong(srcType));
#endif // !defined(_TARGET_64BIT_)
// Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
@@ -8225,27 +7099,27 @@ void CodeGen::genCkfinite(GenTreePtr treeNode)
//
// For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
// movaps targetReg, op1->gtRegNum
- // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
- // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
+ // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
// and tmpReg, <mask>
// cmp tmpReg, <mask>
// je <throw block>
// movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
//
// For TYP_DOUBLE with (targetReg == op1->gtRegNum):
- // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
- // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
+ // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
// and tmpReg, <mask>
// cmp tmpReg, <mask>
// je <throw block>
- // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
+ // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
//
// For TYP_FLOAT, it's the same as _TARGET_64BIT_:
- // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
// and tmpReg, <mask>
// cmp tmpReg, <mask>
// je <throw block>
- // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
+ // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
@@ -8613,7 +7487,7 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
#if FEATURE_FIXED_OUT_ARGS
baseVarNum = compiler->lvaOutgoingArgSpaceVar;
#else // !FEATURE_FIXED_OUT_ARGS
- NYI_X86("Stack args for x86/RyuJIT");
+ assert(!"No BaseVarForPutArgStk on x86");
baseVarNum = BAD_VAR_NUM;
#endif // !FEATURE_FIXED_OUT_ARGS
}
@@ -8621,8 +7495,74 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
return baseVarNum;
}
-//--------------------------------------------------------------------- //
-// genPutStructArgStk - generate code for passing an arg on the stack.
+#ifdef _TARGET_X86_
+//---------------------------------------------------------------------
+// adjustStackForPutArgStk:
+// adjust the stack pointer for a putArgStk node if necessary.
+//
+// Arguments:
+// putArgStk - the putArgStk node.
+//
+// Returns: true if the stack pointer was adjusted; false otherwise.
+//
+bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
+{
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(putArgStk))
+ {
+ const unsigned argSize = genTypeSize(putArgStk);
+ inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
+ genStackLevel += argSize;
+ m_pushStkArg = false;
+ return true;
+ }
+#endif // FEATURE_SIMD
+
+ const unsigned argSize = putArgStk->getArgSize();
+
+ // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack.
+ // This is set in Lowering, and is true if and only if:
+ // - This argument contains any GC pointers OR
+ // - It is a GT_FIELD_LIST OR
+ // - It is less than 16 bytes in size.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ switch (putArgStk->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::Kind::RepInstr:
+ case GenTreePutArgStk::Kind::Unroll:
+ assert((putArgStk->gtNumberReferenceSlots == 0) && (putArgStk->gtGetOp1()->OperGet() != GT_FIELD_LIST) &&
+ (argSize >= 16));
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ assert((putArgStk->gtNumberReferenceSlots != 0) || (putArgStk->gtGetOp1()->OperGet() == GT_FIELD_LIST) ||
+ (argSize < 16));
+ break;
+ case GenTreePutArgStk::Kind::Invalid:
+ default:
+ assert(!"Uninitialized GenTreePutArgStk::Kind");
+ break;
+ }
+#endif // DEBUG
+
+ if (putArgStk->isPushKind())
+ {
+ m_pushStkArg = true;
+ return false;
+ }
+ else
+ {
+ m_pushStkArg = false;
+ inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
+ genStackLevel += argSize;
+ return true;
+ }
+}
+
+//---------------------------------------------------------------------
+// genPutArgStkFieldList - generate code for passing an arg on the stack.
//
// Arguments
// treeNode - the GT_PUTARG_STK node
@@ -8631,25 +7571,224 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
// Return value:
// None
//
-void CodeGen::genPutArgStk(GenTreePtr treeNode)
+void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
{
- var_types targetType = treeNode->TypeGet();
+ GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList();
+ assert(fieldList != nullptr);
+
+ // Set m_pushStkArg and pre-adjust the stack if necessary.
+ const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk);
+ // For now, we only support the "push" case; we will push a full slot for the first field of each slot
+ // within the struct.
+ assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg);
+
+ // If we have pre-adjusted the stack and are simply storing the fields in order) set the offset to 0.
+ // (Note that this mode is not currently being used.)
+ // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them
+ // in reverse order, so we start with the current field offset at the size of the struct arg (which must be
+ // a multiple of the target pointer size).
+ unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize();
+ unsigned prevFieldOffset = currentOffset;
+ regNumber tmpReg = REG_NA;
+ if (putArgStk->gtRsvdRegs != RBM_NONE)
+ {
+ assert(genCountBits(putArgStk->gtRsvdRegs) == 1);
+ tmpReg = genRegNumFromMask(putArgStk->gtRsvdRegs);
+ assert(genIsValidIntReg(tmpReg));
+ }
+ for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
+ {
+ GenTree* const fieldNode = current->Current();
+ const unsigned fieldOffset = current->gtFieldOffset;
+ var_types fieldType = current->gtFieldType;
+
+ // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the
+ // field list in descending order by offset.
+ assert(!varTypeIsLong(fieldType));
+ assert(fieldOffset <= prevFieldOffset);
+
+ // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately
+ // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been
+ // assigned a register, and which is therefore contained.
+ // Unlike genConsumeReg(), it handles the case where no registers are being consumed.
+ genConsumeRegs(fieldNode);
+ regNumber argReg = fieldNode->isContainedSpillTemp() ? REG_NA : fieldNode->gtRegNum;
+
+ // If the field is slot-like, we can use a push instruction to store the entire register no matter the type.
+ //
+ // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
+ // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
+ // not require rounding.
+ // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
+ // able to detect stores into the outgoing argument area of the stack on x86.
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
+ int adjustment = roundUp(currentOffset - fieldOffset, 4);
+ if (fieldIsSlot)
+ {
+ fieldType = genActualType(fieldType);
+ unsigned pushSize = genTypeSize(fieldType);
+ assert((pushSize % 4) == 0);
+ adjustment -= pushSize;
+ while (adjustment != 0)
+ {
+ inst_IV(INS_push, 0);
+ currentOffset -= pushSize;
+ genStackLevel += pushSize;
+ adjustment -= pushSize;
+ }
+ m_pushStkArg = true;
+ }
+ else
+ {
+ m_pushStkArg = false;
+ // We always "push" floating point fields (i.e. they are full slot values that don't
+ // require special handling).
+ assert(varTypeIsIntegralOrI(fieldNode));
+ // If we can't push this field, it needs to be in a register so that we can store
+ // it to the stack location.
+ assert(tmpReg != REG_NA);
+ if (adjustment != 0)
+ {
+ // This moves the stack pointer to fieldOffset.
+ // For this case, we must adjust the stack and generate stack-relative stores rather than pushes.
+ // Adjust the stack pointer to the next slot boundary.
+ inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
+ currentOffset -= adjustment;
+ genStackLevel += adjustment;
+ }
+
+ // Does it need to be in a byte register?
+ // If so, we'll use tmpReg, which must have been allocated as a byte register.
+ // If it's already in a register, but not a byteable one, then move it.
+ if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0)))
+ {
+ noway_assert((genRegMask(tmpReg) & RBM_BYTE_REGS) != 0);
+ if (argReg != REG_NA)
+ {
+ inst_RV_RV(INS_mov, tmpReg, argReg, fieldType);
+ argReg = tmpReg;
+ }
+ }
+ }
+
+ if (argReg == REG_NA)
+ {
+ if (m_pushStkArg)
+ {
+ if (fieldNode->isContainedSpillTemp())
+ {
+ assert(fieldNode->IsRegOptional());
+ TempDsc* tmp = getSpillTempDsc(fieldNode);
+ getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
+ compiler->tmpRlsTemp(tmp);
+ }
+ else
+ {
+ assert(varTypeIsIntegralOrI(fieldNode));
+ switch (fieldNode->OperGet())
+ {
+ case GT_LCL_VAR:
+ inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet()));
+ break;
+ case GT_CNS_INT:
+ if (fieldNode->IsIconHandle())
+ {
+ inst_IV_handle(INS_push, fieldNode->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ inst_IV(INS_push, fieldNode->gtIntCon.gtIconVal);
+ }
+ break;
+ default:
+ unreached();
+ }
+ }
+ currentOffset -= TARGET_POINTER_SIZE;
+ genStackLevel += TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ // The stack has been adjusted and we will load the field to tmpReg and then store it on the stack.
+ assert(varTypeIsIntegralOrI(fieldNode));
+ switch (fieldNode->OperGet())
+ {
+ case GT_LCL_VAR:
+ inst_RV_TT(INS_mov, tmpReg, fieldNode);
+ break;
+ case GT_CNS_INT:
+ genSetRegToConst(tmpReg, fieldNode->TypeGet(), fieldNode);
+ break;
+ default:
+ unreached();
+ }
+ genStoreRegToStackArg(fieldType, tmpReg, fieldOffset - currentOffset);
+ }
+ }
+ else
+ {
+ genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
+ if (m_pushStkArg)
+ {
+ // We always push a slot-rounded size
+ currentOffset -= genTypeSize(fieldType);
+ }
+ }
+
+ prevFieldOffset = fieldOffset;
+ }
+ if (currentOffset != 0)
+ {
+ // We don't expect padding at the beginning of a struct, but it could happen with explicit layout.
+ inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
+ genStackLevel += currentOffset;
+ }
+}
+#endif // _TARGET_X86_
+
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for passing an arg on the stack.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// targetType - the type of the treeNode
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
+{
+ var_types targetType = putArgStk->TypeGet();
+
#ifdef _TARGET_X86_
- noway_assert(targetType != TYP_STRUCT);
+
+#ifdef FEATURE_SIMD
+ if (targetType == TYP_SIMD12)
+ {
+ genPutArgStkSIMD12(putArgStk);
+ return;
+ }
+#endif // FEATURE_SIMD
+
+ if (varTypeIsStruct(targetType))
+ {
+ (void)genAdjustStackForPutArgStk(putArgStk);
+ genPutStructArgStk(putArgStk);
+ return;
+ }
// The following logic is applicable for x86 arch.
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+ assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet()));
- GenTreePtr data = treeNode->gtOp.gtOp1;
+ GenTreePtr data = putArgStk->gtOp1;
// On a 32-bit target, all of the long arguments have been decomposed into
// a separate putarg_stk for each of the upper and lower halves.
noway_assert(targetType != TYP_LONG);
- int argSize = genTypeSize(genActualType(targetType));
- genStackLevel += argSize;
+ const unsigned argSize = putArgStk->getArgSize();
+ assert((argSize % TARGET_POINTER_SIZE) == 0);
- // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
if (data->isContainedIntOrIImmed())
{
if (data->IsIconHandle())
@@ -8660,53 +7799,50 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode)
{
inst_IV(INS_push, data->gtIntCon.gtIconVal);
}
+ genStackLevel += argSize;
}
- else if (data->isContained())
+ else if (data->OperGet() == GT_FIELD_LIST)
{
- NYI_X86("Contained putarg_stk of non-constant");
+ genPutArgStkFieldList(putArgStk);
}
else
{
+ // We should not see any contained nodes that are not immediates.
+ assert(!data->isContained());
genConsumeReg(data);
- if (varTypeIsIntegralOrI(targetType))
- {
- inst_RV(INS_push, data->gtRegNum, targetType);
- }
- else
- {
- // Decrement SP.
- inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
- getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
- }
+ genPushReg(targetType, data->gtRegNum);
}
#else // !_TARGET_X86_
{
- unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
+ unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk);
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (varTypeIsStruct(targetType))
{
- genPutStructArgStk(treeNode, baseVarNum);
+ m_stkArgVarNum = baseVarNum;
+ m_stkArgOffset = putArgStk->getArgOffset();
+ genPutStructArgStk(putArgStk);
+ m_stkArgVarNum = BAD_VAR_NUM;
return;
}
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+ assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet()));
// Get argument offset on stack.
// Here we cross check that argument offset hasn't changed from lowering to codegen since
// we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
- int argOffset = treeNode->AsPutArgStk()->getArgOffset();
+ int argOffset = putArgStk->getArgOffset();
#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(putArgStk->gtCall, putArgStk);
assert(curArgTabEntry);
assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
#endif
- GenTreePtr data = treeNode->gtGetOp1();
+ GenTreePtr data = putArgStk->gtOp1;
if (data->isContained())
{
@@ -8723,7 +7859,125 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode)
#endif // !_TARGET_X86_
}
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#ifdef _TARGET_X86_
+// genPushReg: Push a register value onto the stack and adjust the stack level
+//
+// Arguments:
+// type - the type of value to be stored
+// reg - the register containing the value
+//
+// Notes:
+// For TYP_LONG, the srcReg must be a floating point register.
+// Otherwise, the register type must be consistent with the given type.
+//
+void CodeGen::genPushReg(var_types type, regNumber srcReg)
+{
+ unsigned size = genTypeSize(type);
+ if (varTypeIsIntegralOrI(type) && type != TYP_LONG)
+ {
+ assert(genIsValidIntReg(srcReg));
+ inst_RV(INS_push, srcReg, type);
+ }
+ else
+ {
+ instruction ins;
+ emitAttr attr = emitTypeSize(type);
+ if (type == TYP_LONG)
+ {
+ // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg.
+ // This is only used when we are pushing a struct from memory to memory, and basically is
+ // handling an 8-byte "chunk", as opposed to strictly a long type.
+ ins = INS_movq;
+ }
+ else
+ {
+ ins = ins_Store(type);
+ }
+ assert(genIsValidFloatReg(srcReg));
+ inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
+ getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
+ }
+ genStackLevel += size;
+}
+#endif // _TARGET_X86_
+
+#if defined(FEATURE_PUT_STRUCT_ARG_STK)
+// genStoreRegToStackArg: Store a register value into the stack argument area
+//
+// Arguments:
+// type - the type of value to be stored
+// reg - the register containing the value
+// offset - the offset from the base (see Assumptions below)
+//
+// Notes:
+// A type of TYP_STRUCT instructs this method to store a 16-byte chunk
+// at the given offset (i.e. not the full struct).
+//
+// Assumptions:
+// The caller must set the context appropriately before calling this method:
+// - On x64, m_stkArgVarNum must be set according to whether this is a regular or tail call.
+// - On x86, the caller must set m_pushStkArg if this method should push the argument.
+// Otherwise, the argument is stored at the given offset from sp.
+//
+// TODO: In the below code the load and store instructions are for 16 bytes, but the
+// type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+// this probably needs to be changed.
+//
+void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset)
+{
+ assert(srcReg != REG_NA);
+ instruction ins;
+ emitAttr attr;
+ unsigned size;
+
+ if (type == TYP_STRUCT)
+ {
+ ins = INS_movdqu;
+ // This should be changed!
+ attr = EA_8BYTE;
+ size = 16;
+ }
+ else
+ {
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(type))
+ {
+ assert(genIsValidFloatReg(srcReg));
+ ins = ins_Store(type); // TODO-CQ: pass 'aligned' correctly
+ }
+ else
+#endif // FEATURE_SIMD
+#ifdef _TARGET_X86_
+ if (type == TYP_LONG)
+ {
+ assert(genIsValidFloatReg(srcReg));
+ ins = INS_movq;
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ assert((varTypeIsFloating(type) && genIsValidFloatReg(srcReg)) ||
+ (varTypeIsIntegralOrI(type) && genIsValidIntReg(srcReg)));
+ ins = ins_Store(type);
+ }
+ attr = emitTypeSize(type);
+ size = genTypeSize(type);
+ }
+
+#ifdef _TARGET_X86_
+ if (m_pushStkArg)
+ {
+ genPushReg(type, srcReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, offset);
+ }
+#else // !_TARGET_X86_
+ assert(m_stkArgVarNum != BAD_VAR_NUM);
+ getEmitter()->emitIns_S_R(ins, attr, srcReg, m_stkArgVarNum, m_stkArgOffset + offset);
+#endif // !_TARGET_X86_
+}
//---------------------------------------------------------------------
// genPutStructArgStk - generate code for copying a struct arg on the stack by value.
@@ -8731,42 +7985,39 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode)
// it generates the gcinfo as well.
//
// Arguments
-// treeNode - the GT_PUTARG_STK node
-// baseVarNum - the variable number relative to which to put the argument on the stack.
-// For tail calls this is the baseVarNum = 0.
-// For non tail calls this is the outgoingArgSpace.
-//
-// Return value:
-// None
+// putArgStk - the GT_PUTARG_STK node
//
-void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
+// Notes:
+// In the case of fixed out args, the caller must have set m_stkArgVarNum to the variable number
+// corresponding to the argument area (where we will put the argument on the stack).
+// For tail calls this is the baseVarNum = 0.
+// For non tail calls this is the outgoingArgSpace.
+void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
- assert(treeNode->OperGet() == GT_PUTARG_STK);
- assert(baseVarNum != BAD_VAR_NUM);
-
- var_types targetType = treeNode->TypeGet();
+ var_types targetType = putArgStk->TypeGet();
if (varTypeIsSIMD(targetType))
{
- regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
+ regNumber srcReg = genConsumeReg(putArgStk->gtGetOp1());
assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
- getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum,
- treeNode->AsPutArgStk()->getArgOffset());
+ genStoreRegToStackArg(targetType, srcReg, 0);
return;
}
assert(targetType == TYP_STRUCT);
- GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
if (putArgStk->gtNumberReferenceSlots == 0)
{
switch (putArgStk->gtPutArgStkKind)
{
- case GenTreePutArgStk::PutArgStkKindRepInstr:
- genStructPutArgRepMovs(putArgStk, baseVarNum);
+ case GenTreePutArgStk::Kind::RepInstr:
+ genStructPutArgRepMovs(putArgStk);
break;
- case GenTreePutArgStk::PutArgStkKindUnroll:
- genStructPutArgUnroll(putArgStk, baseVarNum);
+ case GenTreePutArgStk::Kind::Unroll:
+ genStructPutArgUnroll(putArgStk);
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ genStructPutArgUnroll(putArgStk);
break;
default:
unreached();
@@ -8775,108 +8026,150 @@ void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
else
{
// No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
+ CLANG_FORMAT_COMMENT_ANCHOR;
- // Consume these registers.
- // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
- GenTreePtr dstAddr = putArgStk;
- GenTreePtr src = putArgStk->gtOp.gtOp1;
- assert(src->OperGet() == GT_OBJ);
- GenTreePtr srcAddr = src->gtGetOp1();
+#ifdef _TARGET_X86_
+ // On x86, any struct that has contains GC references must be stored to the stack using `push` instructions so
+ // that the emitter properly detects the need to update the method's GC information.
+ //
+ // Strictly speaking, it is only necessary to use `push` to store the GC references themselves, so for structs
+ // with large numbers of consecutive non-GC-ref-typed fields, we may be able to improve the code size in the
+ // future.
+ assert(m_pushStkArg);
- unsigned slots = putArgStk->gtNumSlots;
+ GenTree* srcAddr = putArgStk->gtGetOp1()->gtGetOp1();
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ const unsigned numSlots = putArgStk->gtNumSlots;
- // We are always on the stack we don't need to use the write barrier.
- BYTE* gcPtrs = putArgStk->gtGcPtrs;
- unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
+ regNumber srcRegNum = srcAddr->gtRegNum;
+ const bool srcAddrInReg = srcRegNum != REG_NA;
- unsigned i = 0;
- unsigned copiedSlots = 0;
- while (i < slots)
+ unsigned srcLclNum = 0;
+ unsigned srcLclOffset = 0;
+ if (srcAddrInReg)
{
- switch (gcPtrs[i])
+ genConsumeReg(srcAddr);
+ }
+ else
+ {
+ assert(srcAddr->OperIsLocalAddr());
+
+ srcLclNum = srcAddr->AsLclVarCommon()->gtLclNum;
+ if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
{
- case TYPE_GC_NONE:
- // Let's see if we can use rep movsq instead of a sequence of movsq instructions
- // to save cycles and code size.
- {
- unsigned nonGcSlotCount = 0;
+ srcLclOffset = srcAddr->AsLclFld()->gtLclOffs;
+ }
+ }
- do
- {
- nonGcSlotCount++;
- i++;
- } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+ for (int i = numSlots - 1; i >= 0; --i)
+ {
+ emitAttr slotAttr;
+ if (gcPtrs[i] == TYPE_GC_NONE)
+ {
+ slotAttr = EA_4BYTE;
+ }
+ else if (gcPtrs[i] == TYPE_GC_REF)
+ {
+ slotAttr = EA_GCREF;
+ }
+ else
+ {
+ assert(gcPtrs[i] == TYPE_GC_BYREF);
+ slotAttr = EA_BYREF;
+ }
- // If we have a very small contiguous non-gc region, it's better just to
- // emit a sequence of movsq instructions
- if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
- {
- copiedSlots += nonGcSlotCount;
- while (nonGcSlotCount > 0)
- {
- instGen(INS_movsq);
- nonGcSlotCount--;
- }
- }
- else
- {
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
- copiedSlots += nonGcSlotCount;
- instGen(INS_r_movsq);
- }
- }
- break;
+ const unsigned offset = i * 4;
+ if (srcAddrInReg)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
+ }
+ else
+ {
+ getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
+ }
+ genStackLevel += 4;
+ }
+#else // !defined(_TARGET_X86_)
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA);
+
+ const bool srcIsLocal = putArgStk->gtOp1->AsObj()->gtOp1->OperIsLocalAddr();
+ const emitAttr srcAddrAttr = srcIsLocal ? EA_PTRSIZE : EA_BYREF;
+
+#if DEBUG
+ unsigned numGCSlotsCopied = 0;
+#endif // DEBUG
+
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ const unsigned numSlots = putArgStk->gtNumSlots;
+ for (unsigned i = 0; i < numSlots;)
+ {
+ if (gcPtrs[i] == TYPE_GC_NONE)
+ {
+ // Let's see if we can use rep movsp (alias for movsd or movsq for 32 and 64 bits respectively)
+ // instead of a sequence of movsp instructions to save cycles and code size.
+ unsigned adjacentNonGCSlotCount = 0;
+ do
+ {
+ adjacentNonGCSlotCount++;
+ i++;
+ } while ((i < numSlots) && (gcPtrs[i] == TYPE_GC_NONE));
- case TYPE_GC_REF: // Is an object ref
- case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
+ // If we have a very small contiguous non-ref region, it's better just to
+ // emit a sequence of movsp instructions
+ if (adjacentNonGCSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
{
- // We have a GC (byref or ref) pointer
- // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
- // but the logic for emitting a GC info record is not available (it is internal for the emitter
- // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
- // instGen(INS_movsq); and emission of gc info.
-
- var_types memType;
- if (gcPtrs[i] == TYPE_GC_REF)
- {
- memType = TYP_REF;
- }
- else
+ for (; adjacentNonGCSlotCount > 0; adjacentNonGCSlotCount--)
{
- assert(gcPtrs[i] == TYPE_GC_BYREF);
- memType = TYP_BYREF;
+ instGen(INS_movsp);
}
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, adjacentNonGCSlotCount);
+ instGen(INS_r_movsp);
+ }
+ }
+ else
+ {
+ assert((gcPtrs[i] == TYPE_GC_REF) || (gcPtrs[i] == TYPE_GC_BYREF));
+
+ // We have a GC (byref or ref) pointer
+ // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsp instruction,
+ // but the logic for emitting a GC info record is not available (it is internal for the emitter
+ // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
+ // instGen(INS_movsp); and emission of gc info.
- getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
- getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum,
- ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
+ var_types memType = (gcPtrs[i] == TYPE_GC_REF) ? TYP_REF : TYP_BYREF;
+ getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
+ genStoreRegToStackArg(memType, REG_RCX, i * TARGET_POINTER_SIZE);
+
+#ifdef DEBUG
+ numGCSlotsCopied++;
+#endif // DEBUG
+ i++;
+ if (i < numSlots)
+ {
// Source for the copy operation.
// If a LocalAddr, use EA_PTRSIZE - copy from stack.
// If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
- getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI,
- TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_I(INS_add, srcAddrAttr, REG_RSI, TARGET_POINTER_SIZE);
// Always copying to the stack - outgoing arg area
// (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
- copiedSlots++;
- gcPtrCount--;
- i++;
}
- break;
-
- default:
- unreached();
- break;
}
}
- assert(gcPtrCount == 0);
+ assert(numGCSlotsCopied == putArgStk->gtNumberReferenceSlots);
+#endif // _TARGET_X86_
}
}
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif // defined(FEATURE_PUT_STRUCT_ARG_STK)
/*****************************************************************************
*
@@ -9043,7 +8336,7 @@ void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
return infoPtr;
}
-#else // !JIT32_GCENCODER
+#else // !JIT32_GCENCODER
void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
{
IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
@@ -9061,7 +8354,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
// Now we can actually use those slot ID's to declare live ranges.
gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-#if defined(DEBUGGING_SUPPORT)
if (compiler->opts.compDbgEnC)
{
// what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
@@ -9088,7 +8380,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
// frame
gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
}
-#endif
gcInfoEncoder->Build();
@@ -9203,18 +8494,33 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode)
assert(varDsc->TypeGet() == TYP_LONG);
assert(!varDsc->lvPromoted);
GenTreePtr op1 = treeNode->gtOp.gtOp1;
- noway_assert(op1->OperGet() == GT_LONG);
+ noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG);
genConsumeRegs(op1);
- // Definitions of register candidates will have been lowered to 2 int lclVars.
- assert(!treeNode->InReg());
+ if (op1->OperGet() == GT_LONG)
+ {
+ // Definitions of register candidates will have been lowered to 2 int lclVars.
+ assert(!treeNode->InReg());
+
+ GenTreePtr loVal = op1->gtGetOp1();
+ GenTreePtr hiVal = op1->gtGetOp2();
+
+ // NYI: Contained immediates.
+ NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA),
+ "Store of long lclVar with contained immediate");
+
+ emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
+ emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+ }
+ else if (op1->OperGet() == GT_MUL_LONG)
+ {
+ assert((op1->gtFlags & GTF_MUL_64RSLT) != 0);
- GenTreePtr loVal = op1->gtGetOp1();
- GenTreePtr hiVal = op1->gtGetOp2();
- // NYI: Contained immediates.
- NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+ // Stack store
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum,
+ genTypeSize(TYP_INT));
+ }
}
#endif // !defined(_TARGET_64BIT_)
@@ -9332,57 +8638,6 @@ void CodeGen::genAmd64EmitterUnitTests()
#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
- VarName name = nullptr;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-#endif // DEBUGGING_SUPPORT
-
#endif // _TARGET_AMD64_
#endif // !LEGACY_BACKEND