summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2016-09-01 21:27:46 -0700
committerCarol Eidt <carol.eidt@microsoft.com>2016-09-01 21:27:46 -0700
commite21aeafc31927708972ea301b2155f8313925f04 (patch)
tree05d8f6540ccbce21b000aa2bcf736d6b2b55b93d
parentf71493a5ad04ec2579052afcc606ee5e62f5a3b8 (diff)
downloadcoreclr-e21aeafc31927708972ea301b2155f8313925f04.tar.gz
coreclr-e21aeafc31927708972ea301b2155f8313925f04.tar.bz2
coreclr-e21aeafc31927708972ea301b2155f8313925f04.zip
More PR Feedback
-rw-r--r--src/jit/assertionprop.cpp1
-rw-r--r--src/jit/codegenarm64.cpp1
-rw-r--r--src/jit/codegenlegacy.cpp44107
-rw-r--r--src/jit/codegenxarch.cpp18773
-rw-r--r--src/jit/compiler.h4
-rw-r--r--src/jit/gentree.cpp4
-rw-r--r--src/jit/gentree.h11
-rw-r--r--src/jit/gtlist.h4
-rw-r--r--src/jit/optimizer.cpp1
-rw-r--r--src/jit/rationalize.cpp2
10 files changed, 31451 insertions, 31457 deletions
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
index 3b071290c2..fe35c3b780 100644
--- a/src/jit/assertionprop.cpp
+++ b/src/jit/assertionprop.cpp
@@ -3424,7 +3424,6 @@ GenTreePtr Compiler::optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const Gen
// TODO-1stClassStructs: All indirections should be handled here, but
// previously, when these indirections were GT_OBJ, or implicit due to a block
// copy or init, they were not being handled.
- bool propagateIndir = true;
if (tree->TypeGet() == TYP_STRUCT)
{
if (tree->OperIsBlk())
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 5374a3142f..edd869367d 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -3524,7 +3524,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
assert(treeNode->AsObj()->gtGcPtrCount != 0);
genCodeForCpObj(treeNode->AsObj());
}
- break;
__fallthrough;
case GT_STORE_DYN_BLK:
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
index c49bc644da..ea40eb2aff 100644
--- a/src/jit/codegenlegacy.cpp
+++ b/src/jit/codegenlegacy.cpp
@@ -1,22050 +1,22057 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX XX
-XX CodeGenerator XX
-XX XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
-#include "jitpch.h"
-#ifdef _MSC_VER
-#pragma hdrstop
-#endif
-#include "codegen.h"
-
-#ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
-
-#ifdef _TARGET_AMD64_
-#error AMD64 must be !LEGACY_BACKEND
-#endif
-
-#ifdef _TARGET_ARM64_
-#error ARM64 must be !LEGACY_BACKEND
-#endif
-
-#include "gcinfo.h"
-#include "emit.h"
-
-#ifndef JIT32_GCENCODER
-#include "gcinfoencoder.h"
-#endif
-
-/*****************************************************************************
- *
- * Determine what variables die between beforeSet and afterSet, and
- * update the liveness globals accordingly:
- * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
- */
-
-void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
-{
- unsigned varNum;
- LclVarDsc* varDsc;
- regMaskTP regBit;
- VARSET_TP VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet));
-
- if (VarSetOps::IsEmpty(compiler, deadSet))
- return;
-
- /* iterate through the dead variables */
-
- VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- varNum = compiler->lvaTrackedToVarNum[varIndex];
- varDsc = compiler->lvaTable + varNum;
-
- /* Remove this variable from the 'deadSet' bit set */
-
- noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
-
- VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
-
- noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
- VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
-
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
-
- /* We are done if the variable is not enregistered */
-
- if (!varDsc->lvRegister)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
- }
-#endif
- continue;
- }
-
-#if !FEATURE_FP_REGALLOC
- // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
- if (!varDsc->IsFloatRegType())
-#endif
- {
- /* Get hold of the appropriate register bit(s) */
-
- if (varTypeIsFloating(varDsc->TypeGet()))
- {
- regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
- }
- else
- {
- regBit = genRegMask(varDsc->lvRegNum);
- if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
- regBit |= genRegMask(varDsc->lvOtherReg);
- }
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
- compiler->compRegVarName(varDsc->lvRegNum));
- }
-#endif
- noway_assert((regSet.rsMaskVars & regBit) != 0);
-
- regSet.RemoveMaskVars(regBit);
-
- // Remove GC tracking if any for this register
-
- if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
- gcInfo.gcMarkRegSetNpt(regBit);
- }
- }
-}
-
-/*****************************************************************************
- *
- * Change the given enregistered local variable node to a register variable node
- */
-
-void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc)
-{
- noway_assert(tree->gtOper == GT_LCL_VAR);
- noway_assert(varDsc->lvRegister);
-
- if (isRegPairType(varDsc->lvType))
- {
- /* Check for the case of a variable that was narrowed to an int */
-
- if (isRegPairType(tree->gtType))
- {
- genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
- return;
- }
-
- noway_assert(tree->gtFlags & GTF_VAR_CAST);
- noway_assert(tree->gtType == TYP_INT);
- }
- else
- {
- noway_assert(!isRegPairType(tree->gtType));
- }
-
- /* It's a register variable -- modify the node */
-
- unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
-
- ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
- tree->SetOper(GT_REG_VAR);
- tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
-
- tree->gtFlags |= livenessFlags;
- tree->gtFlags |= GTF_REG_VAL;
- tree->gtRegNum = varDsc->lvRegNum;
- tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
- tree->gtRegVar.SetLclNum(varNum);
-}
-
-// inline
-void CodeGen::saveLiveness(genLivenessSet* ls)
-{
- VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
- VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
- ls->maskVars = (regMaskSmall)regSet.rsMaskVars;
- ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
- ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
-}
-
-// inline
-void CodeGen::restoreLiveness(genLivenessSet* ls)
-{
- VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
- VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
- regSet.rsMaskVars = ls->maskVars;
- gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
- gcInfo.gcRegByrefSetCur = ls->byRefRegs;
-}
-
-// inline
-void CodeGen::checkLiveness(genLivenessSet* ls)
-{
- assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
- assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
- assert(regSet.rsMaskVars == ls->maskVars);
- assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
- assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
-}
-
-// inline
-bool CodeGenInterface::genMarkLclVar(GenTreePtr tree)
-{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- assert(tree->gtOper == GT_LCL_VAR);
-
- /* Does the variable live in a register? */
-
- varNum = tree->gtLclVarCommon.gtLclNum;
- assert(varNum < compiler->lvaCount);
- varDsc = compiler->lvaTable + varNum;
-
- if (varDsc->lvRegister)
- {
- genBashLclVar(tree, varNum, varDsc);
- return true;
- }
- else
- {
- return false;
- }
-}
-
-// inline
-GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
-{
- bool rev;
- unsigned mul;
- unsigned cns;
- GenTreePtr adr;
- GenTreePtr idx;
-
- if (genCreateAddrMode(tree, // address
- 0, // mode
- false, // fold
- RBM_NONE, // reg mask
- &rev, // reverse ops
- &adr, // base addr
- &idx, // index val
-#if SCALED_ADDR_MODES
- &mul, // scaling
-#endif
- &cns, // displacement
- true)) // don't generate code
- return adr;
- else
- return NULL;
-}
-
-// inline
-void CodeGen::genSinglePush()
-{
- genStackLevel += sizeof(void*);
-}
-
-// inline
-void CodeGen::genSinglePop()
-{
- genStackLevel -= sizeof(void*);
-}
-
-#if FEATURE_STACK_FP_X87
-// inline
-void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
-{
- genFPstkLevel = newValue;
-}
-
-// inline
-unsigned CodeGenInterface::genGetFPstkLevel()
-{
- return genFPstkLevel;
-}
-
-// inline
-void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
-{
- noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
- genFPstkLevel += inc;
-}
-
-// inline
-void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
-{
- noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
- genFPstkLevel -= dec;
-}
-
-#endif // FEATURE_STACK_FP_X87
-
-/*****************************************************************************
- *
- * Generate code that will set the given register to the integer constant.
- */
-
-void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
-{
- noway_assert(type != TYP_REF || val == NULL);
-
- /* Does the reg already hold this constant? */
-
- if (!regTracker.rsIconIsInReg(val, reg))
- {
- if (val == 0)
- {
- instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
- }
-#ifdef _TARGET_ARM_
- // If we can set a register to a constant with a small encoding, then do that.
- else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
- {
- instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
- }
-#endif
- else
- {
- /* See if a register holds the value or a close value? */
- bool constantLoaded = false;
- ssize_t delta;
- regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
-
- if (srcReg != REG_NA)
- {
- if (delta == 0)
- {
- inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
- constantLoaded = true;
- }
- else
- {
-#if defined(_TARGET_XARCH_)
- /* delta should fit inside a byte */
- if (delta == (signed char)delta)
- {
- /* use an lea instruction to set reg */
- getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
- constantLoaded = true;
- }
-#elif defined(_TARGET_ARM_)
- /* We found a register 'regS' that has the value we need, modulo a small delta.
- That is, the value we need is 'regS + delta'.
- We one to generate one of the following instructions, listed in order of preference:
-
- adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
- 0<=delta<=255
- subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
- -255<=delta<=0
- adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
- subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
- mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
- movw regD, icon ; 4 bytes. 0<=icon<=65535
- add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
- sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
- addw regD, regS, delta ; 4 bytes. 0<=delta<=4095
- subw regD, regS, delta ; 4 bytes. -4095<=delta<=0
-
- If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
- than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
- can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
- 0x80000000. In this case, delta will be 1.
- */
-
- bool useAdd = false;
- regMaskTP regMask = genRegMask(reg);
- regMaskTP srcRegMask = genRegMask(srcReg);
-
- if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
- (unsigned_abs(delta) <= 255))
- {
- useAdd = true;
- }
- else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
- (unsigned_abs(delta) <= 7))
- {
- useAdd = true;
- }
- else if (arm_Valid_Imm_For_Mov(val))
- {
- // fall through to general "!constantLoaded" case below
- }
- else if (arm_Valid_Imm_For_Add(delta, flags))
- {
- useAdd = true;
- }
-
- if (useAdd)
- {
- getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
- constantLoaded = true;
- }
-#else
- assert(!"Codegen missing");
-#endif
- }
- }
-
- if (!constantLoaded) // Have we loaded it yet?
- {
-#ifdef _TARGET_X86_
- if (val == -1)
- {
- /* or reg,-1 takes 3 bytes */
- inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
- }
- else
- /* For SMALL_CODE it is smaller to push a small immediate and
- then pop it into the dest register */
- if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
- {
- /* "mov" has no s(sign)-bit and so always takes 6 bytes,
- whereas push+pop takes 2+1 bytes */
-
- inst_IV(INS_push, val);
- genSinglePush();
-
- inst_RV(INS_pop, reg, type);
- genSinglePop();
- }
- else
-#endif // _TARGET_X86_
- {
- instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
- }
- }
- }
- }
- regTracker.rsTrackRegIntCns(reg, val);
- gcInfo.gcMarkRegPtrVal(reg, type);
-}
-
-/*****************************************************************************
- *
- * Find an existing register set to the given integer constant, or
- * pick a register and generate code that will set it to the integer constant.
- *
- * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
- * to pick some register to set. NOTE that this means the returned regNumber
- * might *not* be in regBest. It also implies that you should lock any registers
- * you don't want spilled (not just mark as used).
- *
- */
-
-regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
-{
- regNumber regCns;
-#if REDUNDANT_LOAD
-
- // Is there already a register with zero that we can use?
- regCns = regTracker.rsIconIsInReg(val);
-
- if (regCns == REG_NA)
-#endif
- {
- // If not, grab a register to hold the constant, preferring
- // any register besides RBM_TMP_0 so it can hopefully be re-used
- regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
-
- // Now set the constant
- genSetRegToIcon(regCns, val, type);
- }
-
- // NOTE: there is guarantee that regCns is in regBest's mask
- return regCns;
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Add the given constant to the specified register.
- * 'tree' is the resulting tree
- */
-
-void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
-{
- bool setFlags = (tree != NULL) && tree->gtSetFlags();
-
-#ifdef _TARGET_XARCH_
- /* First check to see if we can generate inc or dec instruction(s) */
- /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
- if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
- {
- emitAttr size = emitTypeSize(dstType);
-
- switch (ival)
- {
- case 2:
- inst_RV(INS_inc, reg, dstType, size);
- __fallthrough;
- case 1:
- inst_RV(INS_inc, reg, dstType, size);
-
- goto UPDATE_LIVENESS;
-
- case -2:
- inst_RV(INS_dec, reg, dstType, size);
- __fallthrough;
- case -1:
- inst_RV(INS_dec, reg, dstType, size);
-
- goto UPDATE_LIVENESS;
- }
- }
-#endif
-
- insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
-
-#ifdef _TARGET_XARCH_
-UPDATE_LIVENESS:
-#endif
-
- if (setFlags)
- genFlagsEqualToReg(tree, reg);
-
- regTracker.rsTrackRegTrash(reg);
-
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
-
- if (tree != NULL)
- {
- if (!tree->OperIsAssignment())
- {
- genMarkTreeInReg(tree, reg);
- if (varTypeIsGC(tree->TypeGet()))
- gcInfo.gcMarkRegSetByref(genRegMask(reg));
- }
- }
-}
-
-/*****************************************************************************
- *
- * Subtract the given constant from the specified register.
- * Should only be used for unsigned sub with overflow. Else
- * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
- * for these cases as the flags are set differently, and the following
- * check for overflow won't work correctly.
- * 'tree' is the resulting tree.
- */
-
-void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree)
-{
- noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
- ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
- noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
-
- regTracker.rsTrackRegTrash(reg);
-
- noway_assert(!varTypeIsGC(tree->TypeGet()));
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
-
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- if (tree)
- {
- genMarkTreeInReg(tree, reg);
- }
-}
-
-/*****************************************************************************
- *
- * Multiply the specified register by the given value.
- * 'tree' is the resulting tree
- */
-
-void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
-{
- noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
-
- regTracker.rsTrackRegTrash(reg);
-
- if (tree)
- {
- genMarkTreeInReg(tree, reg);
- }
-
- bool use_shift = false;
- unsigned shift_by = 0;
-
- if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
- {
- use_shift = true;
- BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
- }
-
- if (use_shift)
- {
- if (shift_by != 0)
- {
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
- }
- }
- else
- {
- instruction ins;
-#ifdef _TARGET_XARCH_
- ins = getEmitter()->inst3opImulForReg(reg);
-#else
- ins = INS_mul;
-#endif
-
- inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
- }
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Compute the value 'tree' into a register that's in 'needReg'
- * (or any free register if 'needReg' is RBM_NONE).
- *
- * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
- * If keepReg==RegSet::KEEP_REG, we mark the register as being used.
- *
- * If you require that the register returned is trashable, pass true for 'freeOnly'.
- */
-
-void CodeGen::genComputeReg(
- GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
-{
- noway_assert(tree->gtType != TYP_VOID);
-
- regNumber reg;
- regNumber rg2;
-
-#if FEATURE_STACK_FP_X87
- noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
- genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
-#elif defined(_TARGET_ARM_)
- noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
- genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
- genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
- genActualType(tree->gtType) == TYP_STRUCT);
-#else
- noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
- genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
- genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
-#endif
-
- /* Generate the value, hopefully into the right register */
-
- genCodeForTree(tree, needReg);
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-
- // There is a workaround in genCodeForTreeLng() that changes the type of the
- // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
- // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
- // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
- // running the rest of this code, because anything looking at gtRegNum on ARM or
- // attempting to move from EAX/EDX will be wrong.
- if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
- goto REG_OK;
-
- reg = tree->gtRegNum;
-
- /* Did the value end up in an acceptable register? */
-
- if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
- {
- /* Not good enough to satisfy the caller's orders */
-
- if (varTypeIsFloating(tree))
- {
- RegSet::RegisterPreference pref(needReg, RBM_NONE);
- rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
- }
- else
- {
- rg2 = regSet.rsGrabReg(needReg);
- }
- }
- else
- {
- /* Do we have to end up with a free register? */
-
- if (!freeOnly)
- goto REG_OK;
-
- /* Did we luck out and the value got computed into an unused reg? */
-
- if (genRegMask(reg) & regSet.rsRegMaskFree())
- goto REG_OK;
-
- /* Register already in use, so spill previous value */
-
- if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
- {
- rg2 = regSet.rsGrabReg(needReg);
- if (rg2 == reg)
- {
- gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
- tree->gtRegNum = reg;
- goto REG_OK;
- }
- }
- else
- {
- /* OK, let's find a trashable home for the value */
-
- regMaskTP rv1RegUsed;
-
- regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
- rg2 = regSet.rsPickReg(needReg);
- regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
- }
- }
-
- noway_assert(reg != rg2);
-
- /* Update the value in the target register */
-
- regTracker.rsTrackRegCopy(rg2, reg);
-
- inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
-
- /* The value has been transferred to 'reg' */
-
- if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
-
- gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
-
- /* The value is now in an appropriate register */
-
- tree->gtRegNum = rg2;
-
-REG_OK:
-
- /* Does the caller want us to mark the register as used? */
-
- if (keepReg == RegSet::KEEP_REG)
- {
- /* In case we're computing a value into a register variable */
-
- genUpdateLife(tree);
-
- /* Mark the register as 'used' */
-
- regSet.rsMarkRegUsed(tree);
- }
-}
-
-/*****************************************************************************
- *
- * Same as genComputeReg(), the only difference being that the result is
- * guaranteed to end up in a trashable register.
- */
-
-// inline
-void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
-{
- genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
-}
-
-/*****************************************************************************
- *
- * The value 'tree' was earlier computed into a register; free up that
- * register (but also make sure the value is presently in a register).
- */
-
-void CodeGen::genReleaseReg(GenTreePtr tree)
-{
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* The register has been spilled -- reload it */
-
- regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
- return;
- }
-
- regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
-}
-
-/*****************************************************************************
- *
- * The value 'tree' was earlier computed into a register. Check whether that
- * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
- * free the register. The caller shouldn't need to be setting GCness of the register
- * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
- */
-
-void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
-{
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* The register has been spilled -- reload it */
-
- regSet.rsUnspillReg(tree, needReg, keepReg);
- return;
- }
- else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
- {
- /* We need the tree in another register. So move it there */
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
- regNumber oldReg = tree->gtRegNum;
-
- /* Pick an acceptable register */
-
- regNumber reg = regSet.rsGrabReg(needReg);
-
- /* Copy the value */
-
- inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
- tree->gtRegNum = reg;
-
- gcInfo.gcMarkRegPtrVal(tree);
- regSet.rsMarkRegUsed(tree);
- regSet.rsMarkRegFree(oldReg, tree);
-
- regTracker.rsTrackRegCopy(reg, oldReg);
- }
-
- /* Free the register if the caller desired so */
-
- if (keepReg == RegSet::FREE_REG)
- {
- regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
- // Can't use RegSet::FREE_REG on a GC type
- noway_assert(!varTypeIsGC(tree->gtType));
- }
- else
- {
- noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
- }
-}
-
-/*****************************************************************************
- *
- * Move one half of a register pair to its new regPair(half).
- */
-
-// inline
-void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off)
-{
- if (src == REG_STK)
- {
- // handle long to unsigned long overflow casts
- while (tree->gtOper == GT_CAST)
- {
- noway_assert(tree->gtType == TYP_LONG);
- tree = tree->gtCast.CastOp();
- }
- noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
- noway_assert(tree->gtType == TYP_LONG);
- inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
- regTracker.rsTrackRegTrash(dst);
- }
- else
- {
- regTracker.rsTrackRegCopy(dst, src);
- inst_RV_RV(INS_mov, dst, src, TYP_INT);
- }
-}
-
-/*****************************************************************************
- *
- * The given long value is in a register pair, but it's not an acceptable
- * one. We have to move the value into a register pair in 'needReg' (if
- * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
- *
- * Important note: if 'needReg' is non-zero, we assume the current pair
- * has not been marked as free. If, OTOH, 'newPair' is specified, we
- * assume that the current register pair is marked as used and free it.
- */
-
-void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair)
-{
- regPairNo oldPair;
-
- regNumber oldLo;
- regNumber oldHi;
- regNumber newLo;
- regNumber newHi;
-
- /* Either a target set or a specific pair may be requested */
-
- noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
-
- /* Get hold of the current pair */
-
- oldPair = tree->gtRegPair;
- noway_assert(oldPair != newPair);
-
- /* Are we supposed to move to a specific pair? */
-
- if (newPair != REG_PAIR_NONE)
- {
- regMaskTP oldMask = genRegPairMask(oldPair);
- regMaskTP loMask = genRegMask(genRegPairLo(newPair));
- regMaskTP hiMask = genRegMask(genRegPairHi(newPair));
- regMaskTP overlap = oldMask & (loMask | hiMask);
-
- /* First lock any registers that are in both pairs */
-
- noway_assert((regSet.rsMaskUsed & overlap) == overlap);
- noway_assert((regSet.rsMaskLock & overlap) == 0);
- regSet.rsMaskLock |= overlap;
-
- /* Make sure any additional registers we need are free */
-
- if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
- {
- regSet.rsGrabReg(loMask);
- }
-
- if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
- {
- regSet.rsGrabReg(hiMask);
- }
-
- /* Unlock those registers we have temporarily locked */
-
- noway_assert((regSet.rsMaskUsed & overlap) == overlap);
- noway_assert((regSet.rsMaskLock & overlap) == overlap);
- regSet.rsMaskLock -= overlap;
-
- /* We can now free the old pair */
-
- regSet.rsMarkRegFree(oldMask);
- }
- else
- {
- /* Pick the new pair based on the caller's stated preference */
-
- newPair = regSet.rsGrabRegPair(needReg);
- }
-
- // If grabbed pair is the same as old one we're done
- if (newPair == oldPair)
- {
- noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
- newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
- return;
- }
-
- /* Move the values from the old pair into the new one */
-
- oldLo = genRegPairLo(oldPair);
- oldHi = genRegPairHi(oldPair);
- newLo = genRegPairLo(newPair);
- newHi = genRegPairHi(newPair);
-
- noway_assert(newLo != REG_STK && newHi != REG_STK);
-
- /* Careful - the register pairs might overlap */
-
- if (newLo == oldLo)
- {
- /* The low registers are identical, just move the upper half */
-
- noway_assert(newHi != oldHi);
- genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
- }
- else
- {
- /* The low registers are different, are the upper ones the same? */
-
- if (newHi == oldHi)
- {
- /* Just move the lower half, then */
- genMoveRegPairHalf(tree, newLo, oldLo, 0);
- }
- else
- {
- /* Both sets are different - is there an overlap? */
-
- if (newLo == oldHi)
- {
- /* Are high and low simply swapped ? */
-
- if (newHi == oldLo)
- {
-#ifdef _TARGET_ARM_
- /* Let's use XOR swap to reduce register pressure. */
- inst_RV_RV(INS_eor, oldLo, oldHi);
- inst_RV_RV(INS_eor, oldHi, oldLo);
- inst_RV_RV(INS_eor, oldLo, oldHi);
-#else
- inst_RV_RV(INS_xchg, oldHi, oldLo);
-#endif
- regTracker.rsTrackRegSwap(oldHi, oldLo);
- }
- else
- {
- /* New lower == old higher, so move higher half first */
-
- noway_assert(newHi != oldLo);
- genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
- genMoveRegPairHalf(tree, newLo, oldLo, 0);
- }
- }
- else
- {
- /* Move lower half first */
- genMoveRegPairHalf(tree, newLo, oldLo, 0);
- genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
- }
- }
- }
-
- /* Record the fact that we're switching to another pair */
-
- tree->gtRegPair = newPair;
-}
-
-/*****************************************************************************
- *
- * Compute the value 'tree' into the register pair specified by 'needRegPair'
- * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
- * those in avoidReg.
- * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
- * value ends up in as being used.
- */
-
-void CodeGen::genComputeRegPair(
- GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
-{
- regMaskTP regMask;
- regPairNo regPair;
- regMaskTP tmpMask;
- regMaskTP tmpUsedMask;
- regNumber rLo;
- regNumber rHi;
-
- noway_assert(isRegPairType(tree->gtType));
-
- if (needRegPair == REG_PAIR_NONE)
- {
- if (freeOnly)
- {
- regMask = regSet.rsRegMaskFree() & ~avoidReg;
- if (genMaxOneBit(regMask))
- regMask = regSet.rsRegMaskFree();
- }
- else
- {
- regMask = RBM_ALLINT & ~avoidReg;
- }
-
- if (genMaxOneBit(regMask))
- regMask = regSet.rsRegMaskCanGrab();
- }
- else
- {
- regMask = genRegPairMask(needRegPair);
- }
-
- /* Generate the value, hopefully into the right register pair */
-
- genCodeForTreeLng(tree, regMask, avoidReg);
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-
- regPair = tree->gtRegPair;
- tmpMask = genRegPairMask(regPair);
-
- rLo = genRegPairLo(regPair);
- rHi = genRegPairHi(regPair);
-
- /* At least one half is in a real register */
-
- noway_assert(rLo != REG_STK || rHi != REG_STK);
-
- /* Did the value end up in an acceptable register pair? */
-
- if (needRegPair != REG_PAIR_NONE)
- {
- if (needRegPair != regPair)
- {
- /* This is a workaround. If we specify a regPair for genMoveRegPair */
- /* it expects the source pair being marked as used */
- regSet.rsMarkRegPairUsed(tree);
- genMoveRegPair(tree, 0, needRegPair);
- }
- }
- else if (freeOnly)
- {
- /* Do we have to end up with a free register pair?
- Something might have gotten freed up above */
- bool mustMoveReg = false;
-
- regMask = regSet.rsRegMaskFree() & ~avoidReg;
-
- if (genMaxOneBit(regMask))
- regMask = regSet.rsRegMaskFree();
-
- if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
- {
- /* Note that we must call genMoveRegPair if one of our registers
- comes from the used mask, so that it will be properly spilled. */
-
- mustMoveReg = true;
- }
-
- if (genMaxOneBit(regMask))
- regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
-
- if (genMaxOneBit(regMask))
- regMask |= regSet.rsRegMaskCanGrab();
-
- /* Did the value end up in a free register pair? */
-
- if (mustMoveReg)
- {
- /* We'll have to move the value to a free (trashable) pair */
- genMoveRegPair(tree, regMask, REG_PAIR_NONE);
- }
- }
- else
- {
- noway_assert(needRegPair == REG_PAIR_NONE);
- noway_assert(!freeOnly);
-
- /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
- tmpUsedMask = tmpMask & regSet.rsMaskUsed;
- tmpMask &= ~regSet.rsMaskUsed;
-
- /* Make sure that the value is in "real" registers*/
- if (rLo == REG_STK)
- {
- /* Get one of the desired registers, but exclude rHi */
-
- regSet.rsLockReg(tmpMask);
- regSet.rsLockUsedReg(tmpUsedMask);
-
- regNumber reg = regSet.rsPickReg(regMask);
-
- regSet.rsUnlockUsedReg(tmpUsedMask);
- regSet.rsUnlockReg(tmpMask);
-
- inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
-
- tree->gtRegPair = gen2regs2pair(reg, rHi);
-
- regTracker.rsTrackRegTrash(reg);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- }
- else if (rHi == REG_STK)
- {
- /* Get one of the desired registers, but exclude rLo */
-
- regSet.rsLockReg(tmpMask);
- regSet.rsLockUsedReg(tmpUsedMask);
-
- regNumber reg = regSet.rsPickReg(regMask);
-
- regSet.rsUnlockUsedReg(tmpUsedMask);
- regSet.rsUnlockReg(tmpMask);
-
- inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
-
- tree->gtRegPair = gen2regs2pair(rLo, reg);
-
- regTracker.rsTrackRegTrash(reg);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- }
- }
-
- /* Does the caller want us to mark the register as used? */
-
- if (keepReg == RegSet::KEEP_REG)
- {
- /* In case we're computing a value into a register variable */
-
- genUpdateLife(tree);
-
- /* Mark the register as 'used' */
-
- regSet.rsMarkRegPairUsed(tree);
- }
-}
-
-/*****************************************************************************
- *
- * Same as genComputeRegPair(), the only difference being that the result
- * is guaranteed to end up in a trashable register pair.
- */
-
-// inline
-void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
-{
- genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
-}
-
-/*****************************************************************************
- *
- * The value 'tree' was earlier computed into a register pair; free up that
- * register pair (but also make sure the value is presently in a register
- * pair).
- */
-
-void CodeGen::genReleaseRegPair(GenTreePtr tree)
-{
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* The register has been spilled -- reload it */
-
- regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
- return;
- }
-
- regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
-}
-
-/*****************************************************************************
- *
- * The value 'tree' was earlier computed into a register pair. Check whether
- * either register of that pair has been spilled (and reload it if so), and
- * if 'keepReg' is 0, free the register pair.
- */
-
-void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg)
-{
- if (tree->gtFlags & GTF_SPILLED)
- {
- regMaskTP regMask;
-
- if (regPair == REG_PAIR_NONE)
- regMask = RBM_NONE;
- else
- regMask = genRegPairMask(regPair);
-
- /* The register pair has been spilled -- reload it */
-
- regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
- }
-
- /* Does the caller insist on the value being in a specific place? */
-
- if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
- {
- /* No good -- we'll have to move the value to a new place */
-
- genMoveRegPair(tree, 0, regPair);
-
- /* Mark the pair as used if appropriate */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegPairUsed(tree);
-
- return;
- }
-
- /* Free the register pair if the caller desired so */
-
- if (keepReg == RegSet::FREE_REG)
- regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
-}
-
-/*****************************************************************************
- *
- * Compute the given long value into the specified register pair; don't mark
- * the register pair as used.
- */
-
-// inline
-void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
-{
- genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
- genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
-}
-
-/*****************************************************************************
- * This helper makes sure that the regpair target of an assignment is
- * available for use. This needs to be called in genCodeForTreeLng just before
- * a long assignment, but must not be called until everything has been
- * evaluated, or else we might try to spill enregistered variables.
- *
- */
-
-// inline
-void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
-{
- /* Make sure the target of the store is available */
-
- regNumber regLo = genRegPairLo(regPair);
- regNumber regHi = genRegPairHi(regPair);
-
- if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
- regSet.rsSpillReg(regHi);
-
- if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
- regSet.rsSpillReg(regLo);
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Return true if the given tree 'addr' can be computed via an addressing mode,
- * such as "[ebx+esi*4+20]". If the expression isn't an address mode already
- * try to make it so (but we don't try 'too hard' to accomplish this).
- *
- * If we end up needing a register (or two registers) to hold some part(s) of the
- * address, we return the use register mask via '*useMaskPtr'.
- *
- * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
- * in use. The caller would then be responsible for calling
- * regSet.rsMarkRegFree(*useMaskPtr).
- *
- * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
- * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
- */
-
-bool CodeGen::genMakeIndAddrMode(GenTreePtr addr,
- GenTreePtr oper,
- bool forLea,
- regMaskTP regMask,
- RegSet::KeepReg keepReg,
- regMaskTP* useMaskPtr,
- bool deferOK)
-{
- if (addr->gtOper == GT_ARR_ELEM)
- {
- regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
- *useMaskPtr = regs;
- return true;
- }
-
- bool rev;
- GenTreePtr rv1;
- GenTreePtr rv2;
- bool operIsArrIndex; // is oper an array index
- GenTreePtr scaledIndex; // If scaled addressing mode can't be used
-
- regMaskTP anyMask = RBM_ALLINT;
-
- unsigned cns;
- unsigned mul;
-
- GenTreePtr tmp;
- int ixv = INT_MAX; // unset value
-
- GenTreePtr scaledIndexVal;
-
- regMaskTP newLiveMask;
- regMaskTP rv1Mask;
- regMaskTP rv2Mask;
-
- /* Deferred address mode forming NYI for x86 */
-
- noway_assert(deferOK == false);
-
- noway_assert(oper == NULL ||
- ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
- ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
- operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
-
- if (addr->gtOper == GT_LEA)
- {
- rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
- GenTreeAddrMode* lea = addr->AsAddrMode();
- rv1 = lea->Base();
- rv2 = lea->Index();
- mul = lea->gtScale;
- cns = lea->gtOffset;
-
- if (rv1 != NULL && rv2 == NULL && cns == 0 && (rv1->gtFlags & GTF_REG_VAL) != 0)
- {
- scaledIndex = NULL;
- goto YES;
- }
- }
- else
- {
- // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
- // EASIER TO MERGE
-
- /* Is the complete address already sitting in a register? */
-
- if ((addr->gtFlags & GTF_REG_VAL) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
- {
- genUpdateLife(addr);
-
- rv1 = addr;
- rv2 = scaledIndex = 0;
- cns = 0;
-
- goto YES;
- }
-
- /* Is it an absolute address */
-
- if (addr->IsCnsIntOrI())
- {
- rv1 = rv2 = scaledIndex = 0;
- // along this code path cns is never used, so place a BOGUS value in it as proof
- // cns = addr->gtIntCon.gtIconVal;
- cns = UINT_MAX;
-
- goto YES;
- }
-
- /* Is there a chance of forming an address mode? */
-
- if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
- {
- /* This better not be an array index */
- noway_assert(!operIsArrIndex);
-
- return false;
- }
- // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
- }
-
- /* For scaled array access, RV2 may not be pointing to the index of the
- array if the CPU does not support the needed scaling factor. We will
- make it point to the actual index, and scaledIndex will point to
- the scaled value */
-
- scaledIndex = NULL;
- scaledIndexVal = NULL;
-
- if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
- rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
- {
- scaledIndex = rv2;
- compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
-
- noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
- }
-
- /* Has the address already been computed? */
-
- if (addr->gtFlags & GTF_REG_VAL)
- {
- if (forLea)
- return true;
-
- rv1 = addr;
- rv2 = NULL;
- scaledIndex = NULL;
- genUpdateLife(addr);
- goto YES;
- }
-
- /*
- Here we have the following operands:
-
- rv1 ..... base address
- rv2 ..... offset value (or NULL)
- mul ..... multiplier for rv2 (or 0)
- cns ..... additional constant (or 0)
-
- The first operand must be present (and be an address) unless we're
- computing an expression via 'LEA'. The scaled operand is optional,
- but must not be a pointer if present.
- */
-
- noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
-
- /*-------------------------------------------------------------------------
- *
- * Make sure both rv1 and rv2 (if present) are in registers
- *
- */
-
- // Trivial case : Is either rv1 or rv2 a NULL ?
-
- if (!rv2)
- {
- /* A single operand, make sure it's in a register */
-
- if (cns != 0)
- {
- // In the case where "rv1" is already in a register, there's no reason to get into a
- // register in "regMask" yet, if there's a non-zero constant that we're going to add;
- // if there is, we can do an LEA.
- genCodeForTree(rv1, RBM_NONE);
- }
- else
- {
- genCodeForTree(rv1, regMask);
- }
- goto DONE_REGS;
- }
- else if (!rv1)
- {
- /* A single (scaled) operand, make sure it's in a register */
-
- genCodeForTree(rv2, 0);
- goto DONE_REGS;
- }
-
- /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
- they are in registers */
-
- noway_assert(rv1 && rv2);
-
- /* If we have to check a constant array index, compare it against
- the array dimension (see below) but then fold the index with a
- scaling factor (if any) and additional offset (if any).
- */
-
- if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
- {
- if (scaledIndex != NULL)
- {
- assert(rv2 == scaledIndex && scaledIndexVal != NULL);
- rv2 = scaledIndexVal;
- }
- /* We must have a range-checked index operation */
-
- noway_assert(operIsArrIndex);
-
- /* Get hold of the index value and see if it's a constant */
-
- if (rv2->IsIntCnsFitsInI32())
- {
- ixv = (int)rv2->gtIntCon.gtIconVal;
- // Maybe I should just set "fold" true in the call to genMakeAddressable above.
- if (scaledIndex != NULL)
- {
- int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
- // multiple of 2^6.
- if (mul == 0)
- {
- mul = scale;
- }
- else
- {
- mul *= scale;
- }
- }
- rv2 = scaledIndex = NULL;
-
- /* Add the scaled index into the added value */
-
- if (mul)
- cns += ixv * mul;
- else
- cns += ixv;
-
- /* Make sure 'rv1' is in a register */
-
- genCodeForTree(rv1, regMask);
-
- goto DONE_REGS;
- }
- }
-
- if (rv1->gtFlags & GTF_REG_VAL)
- {
- /* op1 already in register - how about op2? */
-
- if (rv2->gtFlags & GTF_REG_VAL)
- {
- /* Great - both operands are in registers already. Just update
- the liveness and we are done. */
-
- if (rev)
- {
- genUpdateLife(rv2);
- genUpdateLife(rv1);
- }
- else
- {
- genUpdateLife(rv1);
- genUpdateLife(rv2);
- }
-
- goto DONE_REGS;
- }
-
- /* rv1 is in a register, but rv2 isn't */
-
- if (!rev)
- {
- /* rv1 is already materialized in a register. Just update liveness
- to rv1 and generate code for rv2 */
-
- genUpdateLife(rv1);
- regSet.rsMarkRegUsed(rv1, oper);
- }
-
- goto GEN_RV2;
- }
- else if (rv2->gtFlags & GTF_REG_VAL)
- {
- /* rv2 is in a register, but rv1 isn't */
-
- noway_assert(rv2->gtOper == GT_REG_VAR);
-
- if (rev)
- {
- /* rv2 is already materialized in a register. Update liveness
- to after rv2 and then hang on to rv2 */
-
- genUpdateLife(rv2);
- regSet.rsMarkRegUsed(rv2, oper);
- }
-
- /* Generate the for the first operand */
-
- genCodeForTree(rv1, regMask);
-
- if (rev)
- {
- // Free up rv2 in the right fashion (it might be re-marked if keepReg)
- regSet.rsMarkRegUsed(rv1, oper);
- regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
- genReleaseReg(rv2);
- regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
- genReleaseReg(rv1);
- }
- else
- {
- /* We have evaluated rv1, and now we just need to update liveness
- to rv2 which was already in a register */
-
- genUpdateLife(rv2);
- }
-
- goto DONE_REGS;
- }
-
- if (forLea && !cns)
- return false;
-
- /* Make sure we preserve the correct operand order */
-
- if (rev)
- {
- /* Generate the second operand first */
-
- // Determine what registers go live between rv2 and rv1
- newLiveMask = genNewLiveRegMask(rv2, rv1);
-
- rv2Mask = regMask & ~newLiveMask;
- rv2Mask &= ~rv1->gtRsvdRegs;
-
- if (rv2Mask == RBM_NONE)
- {
- // The regMask hint cannot be honored
- // We probably have a call that trashes the register(s) in regMask
- // so ignore the regMask hint, but try to avoid using
- // the registers in newLiveMask and the rv1->gtRsvdRegs
- //
- rv2Mask = RBM_ALLINT & ~newLiveMask;
- rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
- }
-
- genCodeForTree(rv2, rv2Mask);
- regMask &= ~genRegMask(rv2->gtRegNum);
-
- regSet.rsMarkRegUsed(rv2, oper);
-
- /* Generate the first operand second */
-
- genCodeForTree(rv1, regMask);
- regSet.rsMarkRegUsed(rv1, oper);
-
- /* Free up both operands in the right order (they might be
- re-marked as used below)
- */
- regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
- genReleaseReg(rv2);
- regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
- genReleaseReg(rv1);
- }
- else
- {
- /* Get the first operand into a register */
-
- // Determine what registers go live between rv1 and rv2
- newLiveMask = genNewLiveRegMask(rv1, rv2);
-
- rv1Mask = regMask & ~newLiveMask;
- rv1Mask &= ~rv2->gtRsvdRegs;
-
- if (rv1Mask == RBM_NONE)
- {
- // The regMask hint cannot be honored
- // We probably have a call that trashes the register(s) in regMask
- // so ignore the regMask hint, but try to avoid using
- // the registers in liveMask and the rv2->gtRsvdRegs
- //
- rv1Mask = RBM_ALLINT & ~newLiveMask;
- rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
- }
-
- genCodeForTree(rv1, rv1Mask);
- regSet.rsMarkRegUsed(rv1, oper);
-
- GEN_RV2:
-
- /* Here, we need to get rv2 in a register. We have either already
- materialized rv1 into a register, or it was already in a one */
-
- noway_assert(rv1->gtFlags & GTF_REG_VAL);
- noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
-
- /* Generate the second operand as well */
-
- regMask &= ~genRegMask(rv1->gtRegNum);
- genCodeForTree(rv2, regMask);
-
- if (rev)
- {
- /* rev==true means the evaluation order is rv2,rv1. We just
- evaluated rv2, and rv1 was already in a register. Just
- update liveness to rv1 and we are done. */
-
- genUpdateLife(rv1);
- }
- else
- {
- /* We have evaluated rv1 and rv2. Free up both operands in
- the right order (they might be re-marked as used below) */
-
- /* Even though we have not explicitly marked rv2 as used,
- rv2->gtRegNum may be used if rv2 is a multi-use or
- an enregistered variable. */
- regMaskTP rv2Used;
- regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
-
- /* Check for special case both rv1 and rv2 are the same register */
- if (rv2Used != genRegMask(rv1->gtRegNum))
- {
- genReleaseReg(rv1);
- regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
- }
- else
- {
- regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
- genReleaseReg(rv1);
- }
- }
- }
-
-/*-------------------------------------------------------------------------
- *
- * At this point, both rv1 and rv2 (if present) are in registers
- *
- */
-
-DONE_REGS:
-
- /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
-
- if (rv1 && !(rv1->gtFlags & GTF_REG_VAL))
- return false;
- if (rv2 && !(rv2->gtFlags & GTF_REG_VAL))
- return false;
-
-YES:
-
- // *(intVar1+intVar1) causes problems as we
- // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
- // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
- // that currently as we return a single mask in useMaskPtr.
-
- if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL))
- {
- if (rv1->gtRegNum == rv2->gtRegNum)
- {
- noway_assert(!operIsArrIndex);
- return false;
- }
- }
-
- /* Check either register operand to see if it needs to be saved */
-
- if (rv1)
- {
- noway_assert(rv1->gtFlags & GTF_REG_VAL);
-
- if (keepReg == RegSet::KEEP_REG)
- {
- regSet.rsMarkRegUsed(rv1, oper);
- }
- else
- {
- /* If the register holds an address, mark it */
-
- gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
- }
- }
-
- if (rv2)
- {
- noway_assert(rv2->gtFlags & GTF_REG_VAL);
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegUsed(rv2, oper);
- }
-
- if (deferOK)
- {
- noway_assert(!scaledIndex);
- return true;
- }
-
- /* Compute the set of registers the address depends on */
-
- regMaskTP useMask = RBM_NONE;
-
- if (rv1)
- {
- if (rv1->gtFlags & GTF_SPILLED)
- regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
-
- noway_assert(rv1->gtFlags & GTF_REG_VAL);
- useMask |= genRegMask(rv1->gtRegNum);
- }
-
- if (rv2)
- {
- if (rv2->gtFlags & GTF_SPILLED)
- {
- if (rv1)
- {
- regMaskTP lregMask = genRegMask(rv1->gtRegNum);
- regMaskTP used;
-
- regSet.rsLockReg(lregMask, &used);
- regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
- regSet.rsUnlockReg(lregMask, used);
- }
- else
- regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
- }
- noway_assert(rv2->gtFlags & GTF_REG_VAL);
- useMask |= genRegMask(rv2->gtRegNum);
- }
-
- /* Tell the caller which registers we need to hang on to */
-
- *useMaskPtr = useMask;
-
- return true;
-}
-
-/*****************************************************************************
- *
- * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
- */
-
-void CodeGen::genRangeCheck(GenTreePtr oper)
-{
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
- GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
-
- GenTreePtr arrLen = bndsChk->gtArrLen;
- GenTreePtr arrRef = NULL;
- int lenOffset = 0;
-
- // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
- // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
- // a register.
-
- if (arrLen->OperGet() == GT_ARR_LENGTH)
- {
- GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
- lenOffset = arrLenExact->ArrLenOffset();
-
-#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
- // We always load the length into a register on ARM and x64.
-
- // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
- // lengths, but the index expression *can* be native int (64-bits)
- arrRef = arrLenExact->ArrRef();
- genCodeForTree(arrRef, RBM_ALLINT);
- noway_assert(arrRef->gtFlags & GTF_REG_VAL);
- regSet.rsMarkRegUsed(arrRef);
- noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
-#endif
- }
-#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
- // This is another form in which we have an array reference and a constant length. Don't use
- // on LOAD_STORE or 64BIT.
- else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
- {
- genCodeForTree(arrRef, RBM_ALLINT);
- noway_assert(arrRef->gtFlags & GTF_REG_VAL);
- regSet.rsMarkRegUsed(arrRef);
- noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
- }
-#endif
-
- // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
- if (arrRef == NULL)
- {
- // (Unless it's a constant.)
- if (!arrLen->IsCnsIntOrI())
- {
- genCodeForTree(arrLen, RBM_ALLINT);
- regSet.rsMarkRegUsed(arrLen);
-
- noway_assert(arrLen->gtFlags & GTF_REG_VAL);
- noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
- }
- }
-
- /* Is the array index a constant value? */
- GenTreePtr index = bndsChk->gtIndex;
- if (!index->IsCnsIntOrI())
- {
- // No, it's not a constant.
- genCodeForTree(index, RBM_ALLINT);
- regSet.rsMarkRegUsed(index);
-
- // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
- // from its register, get it back in a register.
- if (arrRef != NULL)
- genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
- else if (!arrLen->IsCnsIntOrI())
- genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
-
- /* Make sure we have the values we expect */
- noway_assert(index->gtFlags & GTF_REG_VAL);
- noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
-
- noway_assert(index->TypeGet() == TYP_I_IMPL ||
- (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
- var_types indxType = index->TypeGet();
- if (indxType != TYP_I_IMPL)
- indxType = TYP_INT;
-
- if (arrRef != NULL)
- { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
-
- /* Generate "cmp index, [arrRef+LenOffs]" */
- inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
- }
- else if (arrLen->IsCnsIntOrI())
- {
- ssize_t len = arrLen->AsIntConCommon()->IconValue();
- inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
- }
- else
- {
- inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
- }
-
- /* Generate "jae <fail_label>" */
-
- noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- else
- {
- /* Generate "cmp [rv1+LenOffs], cns" */
-
- bool indIsInt = true;
-#ifdef _TARGET_64BIT_
- int ixv = 0;
- ssize_t ixvFull = index->AsIntConCommon()->IconValue();
- if (ixvFull > INT32_MAX)
- {
- indIsInt = false;
- }
- else
- {
- ixv = (int)ixvFull;
- }
-#else
- ssize_t ixvFull = index->AsIntConCommon()->IconValue();
- int ixv = (int)ixvFull;
-#endif
- if (arrRef != NULL && indIsInt)
- { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
- /* Generate "cmp [arrRef+LenOffs], ixv" */
- inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
- // Generate "jbe <fail_label>"
- emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- else if (arrLen->IsCnsIntOrI())
- {
- ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
- // Both are constants; decide at compile time.
- if (!(0 <= ixvFull && ixvFull < lenv))
- {
- genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- }
- else if (!indIsInt)
- {
- genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- else
- {
- /* Generate "cmp arrLen, ixv" */
- inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
- // Generate "jbe <fail_label>"
- emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- }
-
- // Free the registers that were used.
- if (arrRef != NULL)
- {
- regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
- }
- else if (!arrLen->IsCnsIntOrI())
- {
- regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
- }
-
- if (!index->IsCnsIntOrI())
- {
- regSet.rsMarkRegFree(index->gtRegNum, index);
- }
-}
-
-/*****************************************************************************
- *
- * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
- * Otherwise, check if rvalue is in register. If so, mark it. Then
- * call genMakeAddressable(). Needed because genMakeAddressable is used
- * for both lvalue and rvalue, and we only can do this for rvalue.
- */
-
-// inline
-regMaskTP CodeGen::genMakeRvalueAddressable(
- GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
-{
- regNumber reg;
-
-#if REDUNDANT_LOAD
-
- if (tree->gtOper == GT_LCL_VAR)
- {
- reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
-
- if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
- {
- noway_assert(!isRegPairType(tree->gtType));
-
- genMarkTreeInReg(tree, reg);
- }
- }
-
-#endif
-
- return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
-}
-
-/*****************************************************************************/
-
-bool CodeGen::genIsLocalLastUse(GenTreePtr tree)
-{
- const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
-
- noway_assert(tree->OperGet() == GT_LCL_VAR);
- noway_assert(varDsc->lvTracked);
-
- return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
-}
-
-/*****************************************************************************
- *
- * This is genMakeAddressable(GT_ARR_ELEM).
- * Makes the array-element addressible and returns the addressibility registers.
- * It also marks them as used if keepReg==RegSet::KEEP_REG.
- * tree is the dependant tree.
- *
- * Note that an array-element needs 2 registers to be addressibile, the
- * array-object and the offset. This function marks gtArrObj and gtArrInds[0]
- * with the 2 registers so that other functions (like instGetAddrMode()) know
- * where to look for the offset to use.
- */
-
-regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
-{
- noway_assert(arrElem->gtOper == GT_ARR_ELEM);
- noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
-
- /* Evaluate all the operands. We don't evaluate them into registers yet
- as GT_ARR_ELEM does not reorder the evaluation of the operands, and
- hence may use a sub-optimal ordering. We try to improve this
- situation somewhat by accessing the operands in stages
- (genMakeAddressable2 + genComputeAddressable and
- genCompIntoFreeReg + genRecoverReg).
-
- Note: we compute operands into free regs to avoid multiple uses of
- the same register. Multi-use would cause problems when we free
- registers in FIFO order instead of the assumed LIFO order that
- applies to all type of tree nodes except for GT_ARR_ELEM.
- */
-
- GenTreePtr arrObj = arrElem->gtArrElem.gtArrObj;
- unsigned rank = arrElem->gtArrElem.gtArrRank;
- var_types elemType = arrElem->gtArrElem.gtArrElemType;
- regMaskTP addrReg = RBM_NONE;
- regMaskTP regNeed = RBM_ALLINT;
-
-#if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
- // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
- // since the arrObj participates in the lea/add instruction
- // that computes ARG_0 we should avoid putting it in ARG_1
- //
- if (varTypeIsGC(elemType))
- {
- regNeed &= ~RBM_ARG_1;
- }
-#endif
-
- // Strip off any comma expression.
- arrObj = genCodeForCommaTree(arrObj);
-
- // Having generated the code for the comma, we don't care about it anymore.
- arrElem->gtArrElem.gtArrObj = arrObj;
-
- // If the array ref is a stack var that's dying here we have to move it
- // into a register (regalloc already counts of this), as if it's a GC pointer
- // it can be collected from here on. This is not an issue for locals that are
- // in a register, as they get marked as used an will be tracked.
- // The bug that caused this is #100776. (untracked vars?)
- if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
- !genMarkLclVar(arrObj))
- {
- genCodeForTree(arrObj, regNeed);
- regSet.rsMarkRegUsed(arrObj, 0);
- addrReg = genRegMask(arrObj->gtRegNum);
- }
- else
- {
- addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
- true, // forLoadStore
- false, // smallOK
- false, // deferOK
- true); // evalSideEffs
- }
-
- unsigned dim;
- for (dim = 0; dim < rank; dim++)
- genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
-
- /* Ensure that the array-object is in a register */
-
- addrReg = genKeepAddressable(arrObj, addrReg);
- genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
-
- regNumber arrReg = arrObj->gtRegNum;
- regMaskTP arrRegMask = genRegMask(arrReg);
- regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
- regSet.rsLockUsedReg(arrRegMask);
-
- /* Now process all the indices, do the range check, and compute
- the offset of the element */
-
- regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
-
- for (dim = 0; dim < rank; dim++)
- {
- GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim];
-
- /* Get the index into a free register (other than the register holding the array) */
-
- genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
-
-#if CPU_LOAD_STORE_ARCH
- /* Subtract the lower bound, and do the range check */
-
- regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
- getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
- regTracker.rsTrackRegTrash(valueReg);
- getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
- regTracker.rsTrackRegTrash(index->gtRegNum);
-
- getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
- getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
-#else
- /* Subtract the lower bound, and do the range check */
- getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
- regTracker.rsTrackRegTrash(index->gtRegNum);
-
- getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
-#endif
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
-
- if (dim == 0)
- {
- /* Hang on to the register of the first index */
-
- noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
- accReg = index->gtRegNum;
- noway_assert(accReg != arrReg);
- regSet.rsLockUsedReg(genRegMask(accReg));
- }
- else
- {
- /* Evaluate accReg = accReg*dim_size + index */
-
- noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
-#if CPU_LOAD_STORE_ARCH
- getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
- regTracker.rsTrackRegTrash(valueReg);
- getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
-#else
- getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
-#endif
-
- inst_RV_RV(INS_add, accReg, index->gtRegNum);
- regSet.rsMarkRegFree(index->gtRegNum, index);
- regTracker.rsTrackRegTrash(accReg);
- }
- }
-
- if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
- {
- regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
-
- getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
- regTracker.rsTrackRegTrash(accReg);
- }
-
- regSet.rsUnlockUsedReg(genRegMask(arrReg));
- regSet.rsUnlockUsedReg(genRegMask(accReg));
-
- regSet.rsMarkRegFree(genRegMask(arrReg));
- regSet.rsMarkRegFree(genRegMask(accReg));
-
- if (keepReg == RegSet::KEEP_REG)
- {
- /* We mark the addressability registers on arrObj and gtArrInds[0].
- instGetAddrMode() knows to work with this. */
-
- regSet.rsMarkRegUsed(arrObj, tree);
- regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
- }
-
- return genRegMask(arrReg) | genRegMask(accReg);
-}
-
-/*****************************************************************************
- *
- * Make sure the given tree is addressable. 'needReg' is a mask that indicates
- * the set of registers we would prefer the destination tree to be computed
- * into (RBM_NONE means no preference).
- *
- * 'tree' can subsequently be used with the inst_XX_TT() family of functions.
- *
- * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
- * on as used, and return the mask for that register set (if no registers
- * are marked as used, RBM_NONE is returned).
- *
- * If 'smallOK' is not true and the datatype being address is a byte or short,
- * then the tree is forced into a register. This is useful when the machine
- * instruction being emitted does not have a byte or short version.
- *
- * The "deferOK" parameter indicates the mode of operation - when it's false,
- * upon returning an actual address mode must have been formed (i.e. it must
- * be possible to immediately call one of the inst_TT methods to operate on
- * the value). When "deferOK" is true, we do whatever it takes to be ready
- * to form the address mode later - for example, if an index address mode on
- * a particular CPU requires the use of a specific register, we usually don't
- * want to immediately grab that register for an address mode that will only
- * be needed later. The convention is to call genMakeAddressable() with
- * "deferOK" equal to true, do whatever work is needed to prepare the other
- * operand, call genMakeAddressable() with "deferOK" equal to false, and
- * finally call one of the inst_TT methods right after that.
- *
- * If we do any other codegen after genMakeAddressable(tree) which can
- * potentially spill the addressability registers, genKeepAddressable()
- * needs to be called before accessing the tree again.
- *
- * genDoneAddressable() needs to be called when we are done with the tree
- * to free the addressability registers.
- */
-
-regMaskTP CodeGen::genMakeAddressable(
- GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
-{
- GenTreePtr addr = NULL;
- regMaskTP regMask;
-
- /* Is the value simply sitting in a register? */
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- genUpdateLife(tree);
-
- goto GOT_VAL;
- }
-
- // TODO: If the value is for example a cast of float -> int, compute
- // TODO: the converted value into a stack temp, and leave it there,
- // TODO: since stack temps are always addressable. This would require
- // TODO: recording the fact that a particular tree is in a stack temp.
-
- /* byte/char/short operand -- is this acceptable to the caller? */
-
- if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
- goto EVAL_TREE;
-
- // Evaluate non-last elements of comma expressions, to get to the last.
- tree = genCodeForCommaTree(tree);
-
- switch (tree->gtOper)
- {
- case GT_LCL_FLD:
-
- // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
- // to worry about it being enregistered.
- noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
-
- genUpdateLife(tree);
- return 0;
-
- case GT_LCL_VAR:
-
- if (!genMarkLclVar(tree))
- {
- genUpdateLife(tree);
- return 0;
- }
-
- __fallthrough; // it turns out the variable lives in a register
-
- case GT_REG_VAR:
-
- genUpdateLife(tree);
-
- goto GOT_VAL;
-
- case GT_CLS_VAR:
-
- return 0;
-
- case GT_CNS_INT:
-#ifdef _TARGET_64BIT_
- // Non-relocs will be sign extended, so we don't have to enregister
- // constants that are equivalent to a sign-extended int.
- // Relocs can be left alone if they are RIP-relative.
- if ((genTypeSize(tree->TypeGet()) > 4) &&
- (!tree->IsIntCnsFitsInI32() ||
- (tree->IsIconHandle() &&
- (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
- {
- break;
- }
-#endif // _TARGET_64BIT_
- __fallthrough;
-
- case GT_CNS_LNG:
- case GT_CNS_DBL:
- // For MinOpts, we don't do constant folding, so we have
- // constants showing up in places we don't like.
- // force them into a register now to prevent that.
- if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
- return 0;
- break;
-
- case GT_IND:
- case GT_NULLCHECK:
-
- /* Try to make the address directly addressable */
-
- if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
- needReg, keepReg, &regMask, deferOK))
- {
- genUpdateLife(tree);
- return regMask;
- }
-
- /* No good, we'll have to load the address into a register */
-
- addr = tree;
- tree = tree->gtOp.gtOp1;
- break;
-
- default:
- break;
- }
-
-EVAL_TREE:
-
- /* Here we need to compute the value 'tree' into a register */
-
- genCodeForTree(tree, needReg);
-
-GOT_VAL:
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-
- if (isRegPairType(tree->gtType))
- {
- /* Are we supposed to hang on to the register? */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegPairUsed(tree);
-
- regMask = genRegPairMask(tree->gtRegPair);
- }
- else
- {
- /* Are we supposed to hang on to the register? */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegUsed(tree, addr);
-
- regMask = genRegMask(tree->gtRegNum);
- }
-
- return regMask;
-}
-
-/*****************************************************************************
- * Compute a tree (which was previously made addressable using
- * genMakeAddressable()) into a register.
- * needReg - mask of preferred registers.
- * keepReg - should the computed register be marked as used by the tree
- * freeOnly - target register needs to be a scratch register
- */
-
-void CodeGen::genComputeAddressable(GenTreePtr tree,
- regMaskTP addrReg,
- RegSet::KeepReg keptReg,
- regMaskTP needReg,
- RegSet::KeepReg keepReg,
- bool freeOnly)
-{
- noway_assert(genStillAddressable(tree));
- noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
-
- genDoneAddressable(tree, addrReg, keptReg);
-
- regNumber reg;
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- reg = tree->gtRegNum;
-
- if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
- goto MOVE_REG;
- }
- else
- {
- if (tree->OperIsConst())
- {
- /* Need to handle consts separately as we don't want to emit
- "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
- handles consts better for SMALL_CODE */
-
- noway_assert(tree->IsCnsIntOrI());
- reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
- }
- else
- {
- MOVE_REG:
- reg = regSet.rsPickReg(needReg);
-
- inst_RV_TT(INS_mov, reg, tree);
- regTracker.rsTrackRegTrash(reg);
- }
- }
-
- genMarkTreeInReg(tree, reg);
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegUsed(tree);
- else
- gcInfo.gcMarkRegPtrVal(tree);
-}
-
-/*****************************************************************************
- * Should be similar to genMakeAddressable() but gives more control.
- */
-
-regMaskTP CodeGen::genMakeAddressable2(GenTreePtr tree,
- regMaskTP needReg,
- RegSet::KeepReg keepReg,
- bool forLoadStore,
- bool smallOK,
- bool deferOK,
- bool evalSideEffs)
-
-{
- bool evalToReg = false;
-
- if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
- evalToReg = true;
-
-#if CPU_LOAD_STORE_ARCH
- if (!forLoadStore)
- evalToReg = true;
-#endif
-
- if (evalToReg)
- {
- genCodeForTree(tree, needReg);
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-
- if (isRegPairType(tree->gtType))
- {
- /* Are we supposed to hang on to the register? */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegPairUsed(tree);
-
- return genRegPairMask(tree->gtRegPair);
- }
- else
- {
- /* Are we supposed to hang on to the register? */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegUsed(tree);
-
- return genRegMask(tree->gtRegNum);
- }
- }
- else
- {
- return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
- }
-}
-
-/*****************************************************************************
- *
- * The given tree was previously passed to genMakeAddressable(); return
- * 'true' if the operand is still addressable.
- */
-
-// inline
-bool CodeGen::genStillAddressable(GenTreePtr tree)
-{
- /* Has the value (or one or more of its sub-operands) been spilled? */
-
- if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
- return false;
-
- return true;
-}
-
-/*****************************************************************************
- *
- * Recursive helper to restore complex address modes. The 'lockPhase'
- * argument indicates whether we're in the 'lock' or 'reload' phase.
- */
-
-regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase)
-{
- regMaskTP regMask = RBM_NONE;
-
- /* Have we found a spilled value? */
-
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* Do nothing if we're locking, otherwise reload and lock */
-
- if (!lockPhase)
- {
- /* Unspill the register */
-
- regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
-
- /* The value should now be sitting in a register */
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
- regMask = genRegMask(tree->gtRegNum);
-
- /* Mark the register as used for the address */
-
- regSet.rsMarkRegUsed(tree, addr);
-
- /* Lock the register until we're done with the entire address */
-
- regSet.rsMaskLock |= regMask;
- }
-
- return regMask;
- }
-
- /* Is this sub-tree sitting in a register? */
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- regMask = genRegMask(tree->gtRegNum);
-
- /* Lock the register if we're in the locking phase */
-
- if (lockPhase)
- regSet.rsMaskLock |= regMask;
- }
- else
- {
- /* Process any sub-operands of this node */
-
- unsigned kind = tree->OperKind();
-
- if (kind & GTK_SMPOP)
- {
- /* Unary/binary operator */
-
- if (tree->gtOp.gtOp1)
- regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
- if (tree->gtGetOp2())
- regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
- }
- else if (tree->gtOper == GT_ARR_ELEM)
- {
- /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
- which holds the offset-calculation */
-
- regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
- regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
- }
- else if (tree->gtOper == GT_CMPXCHG)
- {
- regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
- }
- else
- {
- /* Must be a leaf/constant node */
-
- noway_assert(kind & (GTK_LEAF | GTK_CONST));
- }
- }
-
- return regMask;
-}
-
-/*****************************************************************************
- *
- * The given tree was previously passed to genMakeAddressable, but since then
- * some of its registers are known to have been spilled; do whatever it takes
- * to make the operand addressable again (typically by reloading any spilled
- * registers).
- */
-
-regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask)
-{
- noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
-
- /* Is this a 'simple' register spill? */
-
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* The mask must match the original register/regpair */
-
- if (isRegPairType(tree->gtType))
- {
- noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
-
- regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
-
- addrReg = genRegPairMask(tree->gtRegPair);
- }
- else
- {
- noway_assert(addrReg == genRegMask(tree->gtRegNum));
-
- regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
-
- addrReg = genRegMask(tree->gtRegNum);
- }
-
- noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
- regSet.rsMaskLock -= lockMask;
-
- return addrReg;
- }
-
- /* We have a complex address mode with some of its sub-operands spilled */
-
- noway_assert((tree->gtFlags & GTF_REG_VAL) == 0);
- noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
-
- /*
- We'll proceed in several phases:
-
- 1. Lock any registers that are part of the address mode and
- have not been spilled. This prevents these registers from
- getting spilled in step 2.
-
- 2. Reload any registers that have been spilled; lock each
- one right after it is reloaded.
-
- 3. Unlock all the registers.
- */
-
- addrReg = genRestoreAddrMode(tree, tree, true);
- addrReg |= genRestoreAddrMode(tree, tree, false);
-
- /* Unlock all registers that the address mode uses */
-
- lockMask |= addrReg;
-
- noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
- regSet.rsMaskLock -= lockMask;
-
- return addrReg;
-}
-
-/*****************************************************************************
- *
- * The given tree was previously passed to genMakeAddressable, but since then
- * some of its registers might have been spilled ('addrReg' is the set of
- * registers used by the address). This function makes sure the operand is
- * still addressable (while avoiding any of the registers in 'avoidMask'),
- * and returns the (possibly modified) set of registers that are used by
- * the address (these will be marked as used on exit).
- */
-
-regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask)
-{
- /* Is the operand still addressable? */
-
- tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
-
- if (!genStillAddressable(tree))
- {
- if (avoidMask)
- {
- // Temporarily lock 'avoidMask' while we restore addressability
- // genRestAddressable will unlock the 'avoidMask' for us
- // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
- // In regSet.rsRegMaskFree() we require that all locked register be marked as used
- //
- regSet.rsLockUsedReg(avoidMask);
- }
-
- addrReg = genRestAddressable(tree, addrReg, avoidMask);
-
- noway_assert((regSet.rsMaskLock & avoidMask) == 0);
- }
-
- return addrReg;
-}
-
-/*****************************************************************************
- *
- * After we're finished with the given operand (which was previously marked
- * by calling genMakeAddressable), this function must be called to free any
- * registers that may have been used by the address.
- * keptReg indicates if the addressability registers were marked as used
- * by genMakeAddressable().
- */
-
-void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
-{
- if (keptReg == RegSet::FREE_REG)
- {
- // We exclude regSet.rsMaskUsed since the registers may be multi-used.
- // ie. There may be a pending use in a higher-up tree.
-
- addrReg &= ~regSet.rsMaskUsed;
-
- /* addrReg was not marked as used. So just reset its GC info */
- if (addrReg)
- {
- gcInfo.gcMarkRegSetNpt(addrReg);
- }
- }
- else
- {
- /* addrReg was marked as used. So we need to free it up (which
- will also reset its GC info) */
-
- regSet.rsMarkRegFree(addrReg);
- }
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Make sure the given floating point value is addressable, and return a tree
- * that will yield the value as an addressing mode (this tree may differ from
- * the one passed in, BTW). If the only way to make the value addressable is
- * to evaluate into the FP stack, we do this and return zero.
- */
-
-GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult)
-{
- *regMaskPtr = 0;
-
- switch (tree->gtOper)
- {
- case GT_LCL_VAR:
- case GT_LCL_FLD:
- case GT_CLS_VAR:
- return tree;
-
- case GT_CNS_DBL:
- if (tree->gtType == TYP_FLOAT)
- {
- float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
- return genMakeConst(&f, TYP_FLOAT, tree, false);
- }
- return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
-
- case GT_IND:
- case GT_NULLCHECK:
-
- /* Try to make the address directly addressable */
-
- if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
- 0, RegSet::FREE_REG, regMaskPtr, false))
- {
- genUpdateLife(tree);
- return tree;
- }
-
- break;
-
- default:
- break;
- }
-#if FEATURE_STACK_FP_X87
- /* We have no choice but to compute the value 'tree' onto the FP stack */
-
- genCodeForTreeFlt(tree);
-#endif
- return 0;
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Display a string literal value (debug only).
- */
-
-#ifdef DEBUG
-#endif
-
-/*****************************************************************************
- *
- * Generate code to check that the GS cookie wasn't thrashed by a buffer
- * overrun. If pushReg is true, preserve all registers around code sequence.
- * Otherwise, ECX maybe modified.
- *
- * TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
- */
-void CodeGen::genEmitGSCookieCheck(bool pushReg)
-{
- // Make sure that EAX didn't die in the return expression
- if (!pushReg && (compiler->info.compRetType == TYP_REF))
- gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
-
- // Add cookie check code for unsafe buffers
- BasicBlock* gsCheckBlk;
- regMaskTP byrefPushedRegs = RBM_NONE;
- regMaskTP norefPushedRegs = RBM_NONE;
- regMaskTP pushedRegs = RBM_NONE;
-
- noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
-
- if (compiler->gsGlobalSecurityCookieAddr == NULL)
- {
- // JIT case
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
-
- regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
- getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
- regTracker.rsTrackRegTrash(reg);
-
- if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
- arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
- {
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
- }
- else
- {
- // Load CookieVal into a register
- regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
- instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
- getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
- }
-#else
- getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
- (int)compiler->gsGlobalSecurityCookieVal);
-#endif
- }
- else
- {
- regNumber regGSCheck;
- regMaskTP regMaskGSCheck;
-#if CPU_LOAD_STORE_ARCH
- regGSCheck = regSet.rsGrabReg(RBM_ALLINT);
- regMaskGSCheck = genRegMask(regGSCheck);
-#else
- // Don't pick the 'this' register
- if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
- (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
- {
- regGSCheck = REG_EDX;
- regMaskGSCheck = RBM_EDX;
- }
- else
- {
- regGSCheck = REG_ECX;
- regMaskGSCheck = RBM_ECX;
- }
-
- // NGen case
- if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
- {
- pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
- }
- else
- {
- noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
- }
-#endif
-#if defined(_TARGET_ARM_)
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
-#else
- getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
- (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-#endif // !_TARGET_ARM_
- regTracker.rsTrashRegSet(regMaskGSCheck);
-#ifdef _TARGET_ARM_
- regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
- getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
- regTracker.rsTrackRegTrash(regTmp);
- getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
-#else
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
-#endif
- }
-
- gsCheckBlk = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, gsCheckBlk);
- genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
- genDefineTempLabel(gsCheckBlk);
-
- genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
-}
-
-/*****************************************************************************
- *
- * Generate any side effects within the given expression tree.
- */
-
-void CodeGen::genEvalSideEffects(GenTreePtr tree)
-{
- genTreeOps oper;
- unsigned kind;
-
-AGAIN:
-
- /* Does this sub-tree contain any side-effects? */
- if (tree->gtFlags & GTF_SIDE_EFFECT)
- {
-#if FEATURE_STACK_FP_X87
- /* Remember the current FP stack level */
- int iTemps = genNumberTemps();
-#endif
- if (tree->OperIsIndir())
- {
- regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- gcInfo.gcMarkRegPtrVal(tree);
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
- }
- // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
- // do not need an additional null-check
- /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
- else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
- {
- /* Compare against any register to do null-check */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_XARCH_)
- inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
-#elif CPU_LOAD_STORE_ARCH
- if (varTypeIsFloating(tree->TypeGet()))
- {
- genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
- RegSet::FREE_REG);
- }
- else
- {
- genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
- }
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-#else
- NYI("TARGET");
-#endif
- }
- else
- {
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
- }
- }
- else
- {
- /* Generate the expression and throw it away */
- genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
- if (tree->gtFlags & GTF_REG_VAL)
- {
- gcInfo.gcMarkRegPtrVal(tree);
- }
- }
-#if FEATURE_STACK_FP_X87
- /* If the tree computed a value on the FP stack, pop the stack */
- if (genNumberTemps() > iTemps)
- {
- noway_assert(genNumberTemps() == iTemps + 1);
- genDiscardStackFP(tree);
- }
-#endif
- return;
- }
-
- noway_assert(tree->gtOper != GT_ASG);
-
- /* Walk the tree, just to mark any dead values appropriately */
-
- oper = tree->OperGet();
- kind = tree->OperKind();
-
- /* Is this a constant or leaf node? */
-
- if (kind & (GTK_CONST | GTK_LEAF))
- {
-#if FEATURE_STACK_FP_X87
- if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
- {
- genRegVarDeathStackFP(tree);
- FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
- }
-#endif
- genUpdateLife(tree);
- gcInfo.gcMarkRegPtrVal(tree);
- return;
- }
-
- /* Must be a 'simple' unary/binary operator */
-
- noway_assert(kind & GTK_SMPOP);
-
- if (tree->gtGetOp2())
- {
- genEvalSideEffects(tree->gtOp.gtOp1);
-
- tree = tree->gtOp.gtOp2;
- goto AGAIN;
- }
- else
- {
- tree = tree->gtOp.gtOp1;
- if (tree)
- goto AGAIN;
- }
-}
-
-/*****************************************************************************
- *
- * A persistent pointer value is being overwritten, record it for the GC.
- *
- * tgt : the destination being written to
- * assignVal : the value being assigned (the source). It must currently be in a register.
- * tgtAddrReg : the set of registers being used by "tgt"
- *
- * Returns : the mask of the scratch register that was used.
- * RBM_NONE if a write-barrier is not needed.
- */
-
-regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg)
-{
- noway_assert(assignVal->gtFlags & GTF_REG_VAL);
-
- GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
- if (wbf == GCInfo::WBF_NoBarrier)
- return RBM_NONE;
-
- regMaskTP resultRegMask = RBM_NONE;
-
-#if FEATURE_WRITE_BARRIER
-
- regNumber reg = assignVal->gtRegNum;
-
-#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
-#ifdef DEBUG
- if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
- {
-#endif
- const static int regToHelper[2][8] = {
- // If the target is known to be in managed memory
- {
- CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
- CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
- },
-
- // Don't know if the target is in managed memory
- {
- CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
- CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
- CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
- },
- };
-
- noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
- noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
- noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
- noway_assert(regToHelper[0][REG_ESP] == -1);
- noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
- noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
- noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
-
- noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
- noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
- noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
- noway_assert(regToHelper[1][REG_ESP] == -1);
- noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
- noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
- noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
-
- noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
-
- /*
- Generate the following code:
-
- lea edx, tgt
- call write_barrier_helper_reg
-
- First grab the RBM_WRITE_BARRIER register for the target address.
- */
-
- regNumber rg1;
- bool trashOp1;
-
- if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
- {
- rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
-
- regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
- regSet.rsMaskLock |= RBM_WRITE_BARRIER;
-
- trashOp1 = false;
- }
- else
- {
- rg1 = REG_WRITE_BARRIER;
-
- trashOp1 = true;
- }
-
- noway_assert(rg1 == REG_WRITE_BARRIER);
-
- /* Generate "lea EDX, [addr-mode]" */
-
- noway_assert(tgt->gtType == TYP_REF);
- tgt->gtType = TYP_BYREF;
- inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
-
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
-
- // In case "tgt" was a comma:
- tgt = tgt->gtEffectiveVal();
-
- regTracker.rsTrackRegTrash(rg1);
- gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
- gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
-
- /* Call the proper vm helper */
-
- // enforced by gcIsWriteBarrierCandidate
- noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
-
- unsigned tgtAnywhere = 0;
- if ((tgt->gtOper == GT_IND) &&
- ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
- {
- tgtAnywhere = 1;
- }
-
- int helper = regToHelper[tgtAnywhere][reg];
- resultRegMask = genRegMask(reg);
-
- gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
-
- genEmitHelperCall(helper,
- 0, // argSize
- EA_PTRSIZE); // retSize
-
- if (!trashOp1)
- {
- regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
- regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
- }
-
- return resultRegMask;
-
-#ifdef DEBUG
- }
- else
-#endif
-#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
-
-#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
- {
- /*
- Generate the following code (or its equivalent on the given target):
-
- mov arg1, srcReg
- lea arg0, tgt
- call write_barrier_helper
-
- First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
- */
-
- if (reg != REG_ARG_1)
- {
- // We may need to spill whatever is in the ARG_1 register
- //
- if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
- {
- regSet.rsSpillReg(REG_ARG_1);
- }
-
- inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
- }
- resultRegMask = RBM_ARG_1;
-
- regTracker.rsTrackRegTrash(REG_ARG_1);
- gcInfo.gcMarkRegSetNpt(REG_ARG_1);
- gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
-
- bool free_arg1 = false;
- if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
- {
- regSet.rsMaskUsed |= RBM_ARG_1;
- free_arg1 = true;
- }
-
- // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
-
- /* Generate "lea R0, [addr-mode]" */
-
- noway_assert(tgt->gtType == TYP_REF);
- tgt->gtType = TYP_BYREF;
-
- tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
-
- // We may need to spill whatever is in the ARG_0 register
- //
- if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0
- ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
- (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
- {
- regSet.rsSpillReg(REG_ARG_0);
- }
-
- inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
-
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
-
- regTracker.rsTrackRegTrash(REG_ARG_0);
- gcInfo.gcMarkRegSetNpt(REG_ARG_0);
- gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
-
-#ifdef _TARGET_ARM_
-#if NOGC_WRITE_BARRIERS
- // Finally, we may be required to spill whatever is in the further argument registers
- // trashed by the call. The write barrier trashes some further registers --
- // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
-
- regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
-#else
- regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
-#endif
- // Spill any other registers trashed by the write barrier call and currently in use.
- regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
- if (mustSpill)
- regSet.rsSpillRegs(mustSpill);
-#endif // _TARGET_ARM_
-
- bool free_arg0 = false;
- if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
- {
- regSet.rsMaskUsed |= RBM_ARG_0;
- free_arg0 = true;
- }
-
- // genEmitHelperCall might need to grab a register
- // so don't let it spill one of the arguments
- //
- regMaskTP reallyUsedRegs = RBM_NONE;
- regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
-
- genGCWriteBarrier(tgt, wbf);
-
- regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
- gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
-
- if (free_arg0)
- {
- regSet.rsMaskUsed &= ~RBM_ARG_0;
- }
- if (free_arg1)
- {
- regSet.rsMaskUsed &= ~RBM_ARG_1;
- }
-
- return resultRegMask;
- }
-#endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
-
-#else // !FEATURE_WRITE_BARRIER
-
- NYI("FEATURE_WRITE_BARRIER unimplemented");
- return resultRegMask;
-
-#endif // !FEATURE_WRITE_BARRIER
-}
-
-#ifdef _TARGET_X86_
-/*****************************************************************************
- *
- * Generate the appropriate conditional jump(s) right after the low 32 bits
- * of two long values have been compared.
- */
-
-void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
-{
- if (cmp != GT_NE)
- {
- jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
- }
-
- switch (cmp)
- {
- case GT_EQ:
- inst_JMP(EJ_jne, jumpFalse);
- break;
-
- case GT_NE:
- inst_JMP(EJ_jne, jumpTrue);
- break;
-
- case GT_LT:
- case GT_LE:
- if (isUnsigned)
- {
- inst_JMP(EJ_ja, jumpFalse);
- inst_JMP(EJ_jb, jumpTrue);
- }
- else
- {
- inst_JMP(EJ_jg, jumpFalse);
- inst_JMP(EJ_jl, jumpTrue);
- }
- break;
-
- case GT_GE:
- case GT_GT:
- if (isUnsigned)
- {
- inst_JMP(EJ_jb, jumpFalse);
- inst_JMP(EJ_ja, jumpTrue);
- }
- else
- {
- inst_JMP(EJ_jl, jumpFalse);
- inst_JMP(EJ_jg, jumpTrue);
- }
- break;
-
- default:
- noway_assert(!"expected a comparison operator");
- }
-}
-
-/*****************************************************************************
- *
- * Generate the appropriate conditional jump(s) right after the high 32 bits
- * of two long values have been compared.
- */
-
-void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
-{
- switch (cmp)
- {
- case GT_EQ:
- inst_JMP(EJ_je, jumpTrue);
- break;
-
- case GT_NE:
- inst_JMP(EJ_jne, jumpTrue);
- break;
-
- case GT_LT:
- inst_JMP(EJ_jb, jumpTrue);
- break;
-
- case GT_LE:
- inst_JMP(EJ_jbe, jumpTrue);
- break;
-
- case GT_GE:
- inst_JMP(EJ_jae, jumpTrue);
- break;
-
- case GT_GT:
- inst_JMP(EJ_ja, jumpTrue);
- break;
-
- default:
- noway_assert(!"expected comparison");
- }
-}
-#elif defined(_TARGET_ARM_)
-/*****************************************************************************
-*
-* Generate the appropriate conditional jump(s) right after the low 32 bits
-* of two long values have been compared.
-*/
-
-void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
-{
- if (cmp != GT_NE)
- {
- jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
- }
-
- switch (cmp)
- {
- case GT_EQ:
- inst_JMP(EJ_ne, jumpFalse);
- break;
-
- case GT_NE:
- inst_JMP(EJ_ne, jumpTrue);
- break;
-
- case GT_LT:
- case GT_LE:
- if (isUnsigned)
- {
- inst_JMP(EJ_hi, jumpFalse);
- inst_JMP(EJ_lo, jumpTrue);
- }
- else
- {
- inst_JMP(EJ_gt, jumpFalse);
- inst_JMP(EJ_lt, jumpTrue);
- }
- break;
-
- case GT_GE:
- case GT_GT:
- if (isUnsigned)
- {
- inst_JMP(EJ_lo, jumpFalse);
- inst_JMP(EJ_hi, jumpTrue);
- }
- else
- {
- inst_JMP(EJ_lt, jumpFalse);
- inst_JMP(EJ_gt, jumpTrue);
- }
- break;
-
- default:
- noway_assert(!"expected a comparison operator");
- }
-}
-
-/*****************************************************************************
-*
-* Generate the appropriate conditional jump(s) right after the high 32 bits
-* of two long values have been compared.
-*/
-
-void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
-{
- switch (cmp)
- {
- case GT_EQ:
- inst_JMP(EJ_eq, jumpTrue);
- break;
-
- case GT_NE:
- inst_JMP(EJ_ne, jumpTrue);
- break;
-
- case GT_LT:
- inst_JMP(EJ_lo, jumpTrue);
- break;
-
- case GT_LE:
- inst_JMP(EJ_ls, jumpTrue);
- break;
-
- case GT_GE:
- inst_JMP(EJ_hs, jumpTrue);
- break;
-
- case GT_GT:
- inst_JMP(EJ_hi, jumpTrue);
- break;
-
- default:
- noway_assert(!"expected comparison");
- }
-}
-#endif
-/*****************************************************************************
- *
- * Called by genCondJump() for TYP_LONG.
- */
-
-void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
-{
- noway_assert(jumpTrue && jumpFalse);
- noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
- noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
-
- GenTreePtr op1 = cond->gtOp.gtOp1;
- GenTreePtr op2 = cond->gtOp.gtOp2;
- genTreeOps cmp = cond->OperGet();
-
- regMaskTP addrReg;
-
- /* Are we comparing against a constant? */
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- __int64 lval = op2->gtLngCon.gtLconVal;
- regNumber rTmp;
-
- // We're "done" evaluating op2; let's strip any commas off op1 before we
- // evaluate it.
- op1 = genCodeForCommaTree(op1);
-
- /* We can generate better code for some special cases */
- instruction ins = INS_invalid;
- bool useIncToSetFlags = false;
- bool specialCaseCmp = false;
-
- if (cmp == GT_EQ)
- {
- if (lval == 0)
- {
- /* op1 == 0 */
- ins = INS_OR;
- useIncToSetFlags = false;
- specialCaseCmp = true;
- }
- else if (lval == -1)
- {
- /* op1 == -1 */
- ins = INS_AND;
- useIncToSetFlags = true;
- specialCaseCmp = true;
- }
- }
- else if (cmp == GT_NE)
- {
- if (lval == 0)
- {
- /* op1 != 0 */
- ins = INS_OR;
- useIncToSetFlags = false;
- specialCaseCmp = true;
- }
- else if (lval == -1)
- {
- /* op1 != -1 */
- ins = INS_AND;
- useIncToSetFlags = true;
- specialCaseCmp = true;
- }
- }
-
- if (specialCaseCmp)
- {
- /* Make the comparand addressable */
-
- addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
-
- regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
- insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- regPairNo regPair = op1->gtRegPair;
- regNumber rLo = genRegPairLo(regPair);
- regNumber rHi = genRegPairHi(regPair);
- if (tmpMask & genRegMask(rLo))
- {
- rTmp = rLo;
- }
- else if (tmpMask & genRegMask(rHi))
- {
- rTmp = rHi;
- rHi = rLo;
- }
- else
- {
- rTmp = regSet.rsGrabReg(tmpMask);
- inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
- }
-
- /* The register is now trashed */
- regTracker.rsTrackRegTrash(rTmp);
-
- if (rHi != REG_STK)
- {
- /* Set the flags using INS_AND | INS_OR */
- inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
- }
- else
- {
- /* Set the flags using INS_AND | INS_OR */
- inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
- }
- }
- else // op1 is not GTF_REG_VAL
- {
- rTmp = regSet.rsGrabReg(tmpMask);
-
- /* Load the low 32-bits of op1 */
- inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
-
- /* The register is now trashed */
- regTracker.rsTrackRegTrash(rTmp);
-
- /* Set the flags using INS_AND | INS_OR */
- inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
- }
-
- /* Free up the addrReg(s) if any */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- /* compares against -1, also requires an an inc instruction */
- if (useIncToSetFlags)
- {
- /* Make sure the inc will set the flags */
- assert(cond->gtSetFlags());
- genIncRegBy(rTmp, 1, cond, TYP_INT);
- }
-
-#if FEATURE_STACK_FP_X87
- // We may need a transition block
- if (bFPTransition)
- {
- jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
- }
-#endif
- emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
- inst_JMP(jmpKind, jumpTrue);
- }
- else // specialCaseCmp == false
- {
- /* Make the comparand addressable */
- addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
-
- /* Compare the high part first */
-
- int ival = (int)(lval >> 32);
-
- /* Comparing a register against 0 is easier */
-
- if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
- {
- /* Generate 'test rTmp, rTmp' */
- instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
- }
- else
- {
- if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
- {
- /* Special case: comparison of two constants */
- // Needed as gtFoldExpr() doesn't fold longs
-
- noway_assert(addrReg == 0);
- int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
-
- /* Get the constant operand into a register */
- rTmp = genGetRegSetToIcon(op1_hiword);
-
- /* Generate 'cmp rTmp, ival' */
-
- inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
- }
- else
- {
- /* Generate 'cmp op1, ival' */
-
- inst_TT_IV(INS_cmp, op1, ival, 4);
- }
- }
-
-#if FEATURE_STACK_FP_X87
- // We may need a transition block
- if (bFPTransition)
- {
- jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
- }
-#endif
- /* Generate the appropriate jumps */
-
- if (cond->gtFlags & GTF_UNSIGNED)
- genJccLongHi(cmp, jumpTrue, jumpFalse, true);
- else
- genJccLongHi(cmp, jumpTrue, jumpFalse);
-
- /* Compare the low part second */
-
- ival = (int)lval;
-
- /* Comparing a register against 0 is easier */
-
- if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
- {
- /* Generate 'test rTmp, rTmp' */
- instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
- }
- else
- {
- if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
- {
- /* Special case: comparison of two constants */
- // Needed as gtFoldExpr() doesn't fold longs
-
- noway_assert(addrReg == 0);
- int op1_loword = (int)op1->gtLngCon.gtLconVal;
-
- /* get the constant operand into a register */
- rTmp = genGetRegSetToIcon(op1_loword);
-
- /* Generate 'cmp rTmp, ival' */
-
- inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
- }
- else
- {
- /* Generate 'cmp op1, ival' */
-
- inst_TT_IV(INS_cmp, op1, ival, 0);
- }
- }
-
- /* Generate the appropriate jumps */
- genJccLongLo(cmp, jumpTrue, jumpFalse);
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
- }
- }
- else // (op2->gtOper != GT_CNS_LNG)
- {
-
- /* The operands would be reversed by physically swapping them */
-
- noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Generate the first operand into a register pair */
-
- genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
-#if CPU_LOAD_STORE_ARCH
- /* Generate the second operand into a register pair */
- // Fix 388442 ARM JitStress WP7
- genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
-#else
- /* Make the second operand addressable */
-
- addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
-#endif
- /* Make sure the first operand hasn't been spilled */
-
- genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regPairNo regPair = op1->gtRegPair;
-
-#if !CPU_LOAD_STORE_ARCH
- /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
-
- addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
-#endif
-
-#if FEATURE_STACK_FP_X87
- // We may need a transition block
- if (bFPTransition)
- {
- jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
- }
-#endif
-
- /* Perform the comparison - high parts */
-
- inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
-
- if (cond->gtFlags & GTF_UNSIGNED)
- genJccLongHi(cmp, jumpTrue, jumpFalse, true);
- else
- genJccLongHi(cmp, jumpTrue, jumpFalse);
-
- /* Compare the low parts */
-
- inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
- genJccLongLo(cmp, jumpTrue, jumpFalse);
-
- /* Free up anything that was tied up by either operand */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
-
- // Fix 388442 ARM JitStress WP7
- regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
- genReleaseRegPair(op2);
-#else
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-#endif
- genReleaseRegPair(op1);
- }
-}
-
-/*****************************************************************************
- * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
- * Called by genCondJumpFlt() to generate the fcomp instruction appropriate
- * to the architecture we're running on.
- *
- * P5:
- * gen_fcomp_FN: fcomp ST(0), stk
- * gen_fcomp_FS_TT: fcomp ST(0), addr
- * gen_fcompp_FS: fcompp
- * These are followed by fnstsw, sahf to get the flags in EFLAGS.
- *
- * P6:
- * gen_fcomp_FN: fcomip ST(0), stk
- * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0)
- * (and reverse the branch condition since addr comes first)
- * gen_fcompp_FS: fcomip, fstp
- * These instructions will correctly set the EFLAGS register.
- *
- * Return value: These functions return true if the instruction has
- * already placed its result in the EFLAGS register.
- */
-
-bool CodeGen::genUse_fcomip()
-{
- return compiler->opts.compUseFCOMI;
-}
-
-/*****************************************************************************
- *
- * Sets the flag for the TYP_INT/TYP_REF comparison.
- * We try to use the flags if they have already been set by a prior
- * instruction.
- * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't
- * need to compare again with zero. Just use a "INS_js"
- *
- * Returns the flags the following jump/set instruction should use.
- */
-
-emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
-{
- noway_assert(cond->OperIsCompare());
- noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
-
- GenTreePtr op1 = cond->gtOp.gtOp1;
- GenTreePtr op2 = cond->gtOp.gtOp2;
- genTreeOps cmp = cond->OperGet();
-
- if (cond->gtFlags & GTF_REVERSE_OPS)
- {
- /* Don't forget to modify the condition as well */
-
- cond->gtOp.gtOp1 = op2;
- cond->gtOp.gtOp2 = op1;
- cond->SetOper(GenTree::SwapRelop(cmp));
- cond->gtFlags &= ~GTF_REVERSE_OPS;
-
- /* Get hold of the new values */
-
- cmp = cond->OperGet();
- op1 = cond->gtOp.gtOp1;
- op2 = cond->gtOp.gtOp2;
- }
-
- // Note that op1's type may get bashed. So save it early
-
- var_types op1Type = op1->TypeGet();
- bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
- emitAttr size = EA_UNKNOWN;
-
- regMaskTP regNeed;
- regMaskTP addrReg1 = RBM_NONE;
- regMaskTP addrReg2 = RBM_NONE;
- emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
-
- bool byteCmp;
- bool shortCmp;
-
- regMaskTP newLiveMask;
- regNumber op1Reg;
-
- /* Are we comparing against a constant? */
-
- if (op2->IsCnsIntOrI())
- {
- ssize_t ival = op2->gtIntConCommon.IconValue();
-
- /* unsigned less than comparisons with 1 ('< 1' )
- should be transformed into '== 0' to potentially
- suppress a tst instruction.
- */
- if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
- {
- op2->gtIntCon.gtIconVal = ival = 0;
- cond->gtOper = cmp = GT_EQ;
- }
-
- /* Comparisons against 0 can be easier */
-
- if (ival == 0)
- {
- // if we can safely change the comparison to unsigned we do so
- if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
- {
- unsignedCmp = true;
- }
-
- /* unsigned comparisons with 0 should be transformed into
- '==0' or '!= 0' to potentially suppress a tst instruction. */
-
- if (unsignedCmp)
- {
- if (cmp == GT_GT)
- cond->gtOper = cmp = GT_NE;
- else if (cmp == GT_LE)
- cond->gtOper = cmp = GT_EQ;
- }
-
- /* Is this a simple zero/non-zero test? */
-
- if (cmp == GT_EQ || cmp == GT_NE)
- {
- /* Is the operand an "AND" operation? */
-
- if (op1->gtOper == GT_AND)
- {
- GenTreePtr an1 = op1->gtOp.gtOp1;
- GenTreePtr an2 = op1->gtOp.gtOp2;
-
- /* Check for the case "expr & icon" */
-
- if (an2->IsIntCnsFitsInI32())
- {
- int iVal = (int)an2->gtIntCon.gtIconVal;
-
- /* make sure that constant is not out of an1's range */
-
- switch (an1->gtType)
- {
- case TYP_BOOL:
- case TYP_BYTE:
- if (iVal & 0xffffff00)
- goto NO_TEST_FOR_AND;
- break;
- case TYP_CHAR:
- case TYP_SHORT:
- if (iVal & 0xffff0000)
- goto NO_TEST_FOR_AND;
- break;
- default:
- break;
- }
-
- if (an1->IsCnsIntOrI())
- {
- // Special case - Both operands of AND are consts
- genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
- addrReg1 = genRegMask(an1->gtRegNum);
- }
- else
- {
- addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
- }
-#if CPU_LOAD_STORE_ARCH
- if ((an1->gtFlags & GTF_REG_VAL) == 0)
- {
- genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
- if (arm_Valid_Imm_For_Alu(iVal))
- {
- inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
- }
- else
- {
- regNumber regTmp = regSet.rsPickFreeReg();
- instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
- inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
- }
- genReleaseReg(an1);
- addrReg1 = RBM_NONE;
- }
- else
-#endif
- {
-#ifdef _TARGET_XARCH_
- // Check to see if we can use a smaller immediate.
- if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal))
- {
- var_types testType =
- (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
-#if CPU_HAS_BYTE_REGS
- // if we don't have byte-able register, switch to the 2-byte form
- if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
- {
- testType = TYP_USHORT;
- }
-#endif // CPU_HAS_BYTE_REGS
-
- inst_TT_IV(INS_TEST, an1, iVal, testType);
- }
- else
-#endif // _TARGET_XARCH_
- {
- inst_TT_IV(INS_TEST, an1, iVal);
- }
- }
-
- goto DONE;
-
- NO_TEST_FOR_AND:;
- }
-
- // TODO: Check for other cases that can generate 'test',
- // TODO: also check for a 64-bit integer zero test which
- // TODO: could generate 'or lo, hi' followed by jz/jnz.
- }
- }
-
- // See what Jcc instruction we would use if we can take advantage of
- // the knowledge of EFLAGs.
-
- if (unsignedCmp)
- {
- /*
- Unsigned comparison to 0. Using this table:
-
- ----------------------------------------------------
- | Comparison | Flags Checked | Instruction Used |
- ----------------------------------------------------
- | == 0 | ZF = 1 | je |
- ----------------------------------------------------
- | != 0 | ZF = 0 | jne |
- ----------------------------------------------------
- | < 0 | always FALSE | N/A |
- ----------------------------------------------------
- | <= 0 | ZF = 1 | je |
- ----------------------------------------------------
- | >= 0 | always TRUE | N/A |
- ----------------------------------------------------
- | > 0 | ZF = 0 | jne |
- ----------------------------------------------------
- */
- switch (cmp)
- {
-#ifdef _TARGET_ARM_
- case GT_EQ:
- jumpKind = EJ_eq;
- break;
- case GT_NE:
- jumpKind = EJ_ne;
- break;
- case GT_LT:
- jumpKind = EJ_NONE;
- break;
- case GT_LE:
- jumpKind = EJ_eq;
- break;
- case GT_GE:
- jumpKind = EJ_NONE;
- break;
- case GT_GT:
- jumpKind = EJ_ne;
- break;
-#elif defined(_TARGET_X86_)
- case GT_EQ:
- jumpKind = EJ_je;
- break;
- case GT_NE:
- jumpKind = EJ_jne;
- break;
- case GT_LT:
- jumpKind = EJ_NONE;
- break;
- case GT_LE:
- jumpKind = EJ_je;
- break;
- case GT_GE:
- jumpKind = EJ_NONE;
- break;
- case GT_GT:
- jumpKind = EJ_jne;
- break;
-#endif // TARGET
- default:
- noway_assert(!"Unexpected comparison OpCode");
- break;
- }
- }
- else
- {
- /*
- Signed comparison to 0. Using this table:
-
- -----------------------------------------------------
- | Comparison | Flags Checked | Instruction Used |
- -----------------------------------------------------
- | == 0 | ZF = 1 | je |
- -----------------------------------------------------
- | != 0 | ZF = 0 | jne |
- -----------------------------------------------------
- | < 0 | SF = 1 | js |
- -----------------------------------------------------
- | <= 0 | N/A | N/A |
- -----------------------------------------------------
- | >= 0 | SF = 0 | jns |
- -----------------------------------------------------
- | > 0 | N/A | N/A |
- -----------------------------------------------------
- */
-
- switch (cmp)
- {
-#ifdef _TARGET_ARM_
- case GT_EQ:
- jumpKind = EJ_eq;
- break;
- case GT_NE:
- jumpKind = EJ_ne;
- break;
- case GT_LT:
- jumpKind = EJ_mi;
- break;
- case GT_LE:
- jumpKind = EJ_NONE;
- break;
- case GT_GE:
- jumpKind = EJ_pl;
- break;
- case GT_GT:
- jumpKind = EJ_NONE;
- break;
-#elif defined(_TARGET_X86_)
- case GT_EQ:
- jumpKind = EJ_je;
- break;
- case GT_NE:
- jumpKind = EJ_jne;
- break;
- case GT_LT:
- jumpKind = EJ_js;
- break;
- case GT_LE:
- jumpKind = EJ_NONE;
- break;
- case GT_GE:
- jumpKind = EJ_jns;
- break;
- case GT_GT:
- jumpKind = EJ_NONE;
- break;
-#endif // TARGET
- default:
- noway_assert(!"Unexpected comparison OpCode");
- break;
- }
- assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
- }
- assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
-
- /* Is the value a simple local variable? */
-
- if (op1->gtOper == GT_LCL_VAR)
- {
- /* Is the flags register set to the value? */
-
- if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
- {
- if (jumpKind != EJ_NONE)
- {
- addrReg1 = RBM_NONE;
- genUpdateLife(op1);
- goto DONE_FLAGS;
- }
- }
- }
-
- /* Make the comparand addressable */
- addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
-
- /* Are the condition flags set based on the value? */
-
- unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- if (genFlagsAreReg(op1->gtRegNum))
- {
- flags |= GTF_ZSF_SET;
- }
- }
-
- if (flags)
- {
- if (jumpKind != EJ_NONE)
- {
- goto DONE_FLAGS;
- }
- }
-
- /* Is the value in a register? */
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- regNumber reg = op1->gtRegNum;
-
- /* With a 'test' we can do any signed test or any test for equality */
-
- if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
- {
- emitAttr compareSize = emitTypeSize(op1->TypeGet());
-
- // If we have an GT_REG_VAR then the register will be properly sign/zero extended
- // But only up to 4 bytes
- if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
- {
- compareSize = EA_4BYTE;
- }
-
-#if CPU_HAS_BYTE_REGS
- // Make sure if we require a byte compare that we have a byte-able register
- if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
-#endif // CPU_HAS_BYTE_REGS
- {
- /* Generate 'test reg, reg' */
- instGen_Compare_Reg_To_Zero(compareSize, reg);
- goto DONE;
- }
- }
- }
- }
-
- else // if (ival != 0)
- {
- bool smallOk = true;
-
- /* make sure that constant is not out of op1's range
- if it is, we need to perform an int with int comparison
- and therefore, we set smallOk to false, so op1 gets loaded
- into a register
- */
-
- /* If op1 is TYP_SHORT, and is followed by an unsigned
- * comparison, we can use smallOk. But we don't know which
- * flags will be needed. This probably doesn't happen often.
- */
- var_types gtType = op1->TypeGet();
-
- switch (gtType)
- {
- case TYP_BYTE:
- if (ival != (signed char)ival)
- smallOk = false;
- break;
- case TYP_BOOL:
- case TYP_UBYTE:
- if (ival != (unsigned char)ival)
- smallOk = false;
- break;
-
- case TYP_SHORT:
- if (ival != (signed short)ival)
- smallOk = false;
- break;
- case TYP_CHAR:
- if (ival != (unsigned short)ival)
- smallOk = false;
- break;
-
-#ifdef _TARGET_64BIT_
- case TYP_INT:
- if (!FitsIn<INT32>(ival))
- smallOk = false;
- break;
- case TYP_UINT:
- if (!FitsIn<UINT32>(ival))
- smallOk = false;
- break;
-#endif // _TARGET_64BIT_
-
- default:
- break;
- }
-
- if (smallOk && // constant is in op1's range
- !unsignedCmp && // signed comparison
- varTypeIsSmall(gtType) && // smalltype var
- varTypeIsUnsigned(gtType)) // unsigned type
- {
- unsignedCmp = true;
- }
-
- /* Make the comparand addressable */
- addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
- }
-
- // #if defined(DEBUGGING_SUPPORT)
-
- /* Special case: comparison of two constants */
-
- // Needed if Importer doesn't call gtFoldExpr()
-
- if (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI()))
- {
- // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
-
- /* Workaround: get the constant operand into a register */
- genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(addrReg1 == RBM_NONE);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- addrReg1 = genRegMask(op1->gtRegNum);
- }
-
- // #endif
-
- /* Compare the operand against the constant */
-
- if (op2->IsIconHandle())
- {
- inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, ival);
- }
- goto DONE;
- }
-
- //---------------------------------------------------------------------
- //
- // We reach here if op2 was not a GT_CNS_INT
- //
-
- byteCmp = false;
- shortCmp = false;
-
- if (op1Type == op2->gtType)
- {
- shortCmp = varTypeIsShort(op1Type);
- byteCmp = varTypeIsByte(op1Type);
- }
-
- noway_assert(op1->gtOper != GT_CNS_INT);
-
- if (op2->gtOper == GT_LCL_VAR)
- genMarkLclVar(op2);
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
- /* Are we comparing against a register? */
-
- if (op2->gtFlags & GTF_REG_VAL)
- {
- /* Make the comparands addressable and mark as used */
-
- assert(addrReg1 == RBM_NONE);
- addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
-
- /* Is the size of the comparison byte/char/short ? */
-
- if (varTypeIsSmall(op1->TypeGet()))
- {
- /* Is op2 sitting in an appropriate register? */
-
- if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
- goto NO_SMALL_CMP;
-
- /* Is op2 of the right type for a small comparison */
-
- if (op2->gtOper == GT_REG_VAR)
- {
- if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
- goto NO_SMALL_CMP;
- }
- else
- {
- if (op1->gtType != op2->gtType)
- goto NO_SMALL_CMP;
- }
-
- if (varTypeIsUnsigned(op1->TypeGet()))
- unsignedCmp = true;
- }
-
- assert(addrReg2 == RBM_NONE);
-
- genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
- addrReg2 = genRegMask(op2->gtRegNum);
- addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
- /* Compare against the register */
-
- inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
-
- goto DONE;
-
- NO_SMALL_CMP:
-
- // op1 has been made addressable and is marked as in use
- // op2 is un-generated
- assert(addrReg2 == 0);
-
- if ((op1->gtFlags & GTF_REG_VAL) == 0)
- {
- regNumber reg1 = regSet.rsPickReg();
-
- noway_assert(varTypeIsSmall(op1->TypeGet()));
- instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL) != 0);
-
- // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
- // so we need to make sure it is still valid. Note that at this point, reg1 is
- // *not* marked as in use, and it is possible for it to be used in the address
- // mode expression, but that is OK, because we are done with expression after
- // this. We only need reg1.
- addrReg1 = genKeepAddressable(op1, addrReg1);
- inst_RV_TT(ins, reg1, op1);
- regTracker.rsTrackRegTrash(reg1);
-
- genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
- addrReg1 = 0;
-
- genMarkTreeInReg(op1, reg1);
-
- regSet.rsMarkRegUsed(op1);
- addrReg1 = genRegMask(op1->gtRegNum);
- }
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
- goto DONE_OP1;
- }
-
- // We come here if op2 is not enregistered or not in a "good" register.
-
- assert(addrReg1 == 0);
-
- // Determine what registers go live between op1 and op2
- newLiveMask = genNewLiveRegMask(op1, op2);
-
- // Setup regNeed with the set of register that we suggest for op1 to be in
- //
- regNeed = RBM_ALLINT;
-
- // avoid selecting registers that get newly born in op2
- regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
-
- // avoid selecting op2 reserved regs
- regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
-
-#if CPU_HAS_BYTE_REGS
- // if necessary setup regNeed to select just the byte-able registers
- if (byteCmp)
- regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
-#endif // CPU_HAS_BYTE_REGS
-
- // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
- //
- genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- op1Reg = op1->gtRegNum;
-
- // Setup regNeed with the set of register that we require for op1 to be in
- //
- regNeed = RBM_ALLINT;
-
-#if CPU_HAS_BYTE_REGS
- // if necessary setup regNeed to select just the byte-able registers
- if (byteCmp)
- regNeed &= RBM_BYTE_REGS;
-#endif // CPU_HAS_BYTE_REGS
-
- // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
- regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
-
- // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
- regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
-
- // Did we end up in an acceptable register?
- // and do we have an acceptable free register available to grab?
- //
- if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
- {
- // Grab an acceptable register
- regNumber newReg = regSet.rsGrabReg(regNeed);
-
- noway_assert(op1Reg != newReg);
-
- /* Update the value in the target register */
-
- regTracker.rsTrackRegCopy(newReg, op1Reg);
-
- inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
-
- /* The value has been transferred to 'reg' */
-
- if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
-
- gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
-
- /* The value is now in an appropriate register */
-
- op1->gtRegNum = newReg;
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- op1Reg = op1->gtRegNum;
-
- genUpdateLife(op1);
-
- /* Mark the register as 'used' */
- regSet.rsMarkRegUsed(op1);
-
- addrReg1 = genRegMask(op1Reg);
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
-DONE_OP1:
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
- // when byteCmp is true we will perform a byte sized cmp instruction
- // and that instruction requires that any registers used are byte-able ones.
- //
- regNeed = RBM_ALLINT;
-
-#if CPU_HAS_BYTE_REGS
- // if necessary setup regNeed to select just the byte-able registers
- if (byteCmp)
- regNeed &= RBM_BYTE_REGS;
-#endif // CPU_HAS_BYTE_REGS
-
- /* Make the comparand addressable */
- assert(addrReg2 == 0);
- addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
-
- /* Make sure the first operand is still in a register; if
- it's been spilled, we have to make sure it's reloaded
- into a byte-addressable register if needed.
- Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
- */
-
- assert(addrReg1 != 0);
- genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
-
- addrReg1 = genRegMask(op1->gtRegNum);
- regSet.rsLockUsedReg(addrReg1);
-
- /* Make sure that op2 is addressable. If we are going to do a
- byte-comparison, we need it to be in a byte register. */
-
- if (byteCmp && (op2->gtFlags & GTF_REG_VAL))
- {
- genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
- addrReg2 = genRegMask(op2->gtRegNum);
- }
- else
- {
- addrReg2 = genKeepAddressable(op2, addrReg2);
- }
-
- regSet.rsUnlockUsedReg(addrReg1);
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
- if (byteCmp || shortCmp)
- {
- size = emitTypeSize(op2->TypeGet());
- if (varTypeIsUnsigned(op1Type))
- unsignedCmp = true;
- }
- else
- {
- size = emitActualTypeSize(op2->TypeGet());
- }
-
- /* Perform the comparison */
- inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
-
-DONE:
-
- jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
-
-DONE_FLAGS: // We have determined what jumpKind to use
-
- genUpdateLife(cond);
-
- /* The condition value is dead at the jump that follows */
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
- genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
- genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
-
- noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
-
- return jumpKind;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Generate code to jump to the jump target of the current basic block if
- * the given relational operator yields 'true'.
- */
-
-void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
-{
- BasicBlock* jumpTrue;
- BasicBlock* jumpFalse;
-
- GenTreePtr op1 = cond->gtOp.gtOp1;
- GenTreePtr op2 = cond->gtOp.gtOp2;
- genTreeOps cmp = cond->OperGet();
-
- if (destTrue)
- {
- jumpTrue = destTrue;
- jumpFalse = destFalse;
- }
- else
- {
- noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
-
- jumpTrue = compiler->compCurBB->bbJumpDest;
- jumpFalse = compiler->compCurBB->bbNext;
- }
-
- noway_assert(cond->OperIsCompare());
-
- /* Make sure the more expensive operand is 'op1' */
- noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
-
- if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
- {
- /* Don't forget to modify the condition as well */
-
- cond->gtOp.gtOp1 = op2;
- cond->gtOp.gtOp2 = op1;
- cond->SetOper(GenTree::SwapRelop(cmp));
- cond->gtFlags &= ~GTF_REVERSE_OPS;
-
- /* Get hold of the new values */
-
- cmp = cond->OperGet();
- op1 = cond->gtOp.gtOp1;
- op2 = cond->gtOp.gtOp2;
- }
-
- /* What is the type of the operand? */
-
- switch (genActualType(op1->gtType))
- {
- case TYP_INT:
- case TYP_REF:
- case TYP_BYREF:
- emitJumpKind jumpKind;
-
- // Check if we can use the currently set flags. Else set them
-
- jumpKind = genCondSetFlags(cond);
-
-#if FEATURE_STACK_FP_X87
- if (bStackFPFixup)
- {
- genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
- }
- else
-#endif
- {
- /* Generate the conditional jump */
- inst_JMP(jumpKind, jumpTrue);
- }
-
- return;
-
- case TYP_LONG:
-#if FEATURE_STACK_FP_X87
- if (bStackFPFixup)
- {
- genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
- }
- else
-#endif
- {
- genCondJumpLng(cond, jumpTrue, jumpFalse);
- }
- return;
-
- case TYP_FLOAT:
- case TYP_DOUBLE:
-#if FEATURE_STACK_FP_X87
- genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
-#else
- genCondJumpFloat(cond, jumpTrue, jumpFalse);
-#endif
- return;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(cond);
-#endif
- unreached(); // unexpected/unsupported 'jtrue' operands type
- }
-}
-
-/*****************************************************************************
- * Spill registers to check callers can handle it.
- */
-
-#ifdef DEBUG
-
-void CodeGen::genStressRegs(GenTreePtr tree)
-{
- if (regSet.rsStressRegs() < 2)
- return;
-
- /* Spill as many registers as possible. Callers should be prepared
- to handle this case.
- But don't spill trees with no size (TYP_STRUCT comes to mind) */
-
- {
- regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
- regNumber regNum;
- regMaskTP regBit;
-
- for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
- {
- if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
- (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
- {
- regSet.rsSpillReg(regNum);
-
- spillRegs &= regSet.rsMaskUsed;
-
- if (!spillRegs)
- break;
- }
- }
- }
-
- regMaskTP trashRegs = regSet.rsRegMaskFree();
-
- if (trashRegs == RBM_NONE)
- return;
-
- /* It is sometimes reasonable to expect that calling genCodeForTree()
- on certain trees won't spill anything */
-
- if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
- handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
- {
- trashRegs &= ~(RBM_EXCEPTION_OBJECT);
- }
-
- // If genCodeForTree() effectively gets called a second time on the same tree
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
- trashRegs &= ~genRegMask(tree->gtRegNum);
- }
-
- if (tree->gtType == TYP_INT && tree->OperIsSimple())
- {
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- if (op1 && (op1->gtFlags & GTF_REG_VAL))
- trashRegs &= ~genRegMask(op1->gtRegNum);
- if (op2 && (op2->gtFlags & GTF_REG_VAL))
- trashRegs &= ~genRegMask(op2->gtRegNum);
- }
-
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- if (compiler->info.compCallUnmanaged)
- {
- LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
- if (varDsc->lvRegister)
- trashRegs &= ~genRegMask(varDsc->lvRegNum);
- }
- }
-
- /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
- to save/restore the register. We try to be as unintrusive
- as possible */
-
- noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
- // This is obviously false for ARM, but this function is never called.
- for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
- {
- regMaskTP regMask = genRegMask(reg);
-
- if (regSet.rsRegsModified(regMask & trashRegs))
- genSetRegToIcon(reg, 0);
- }
-}
-
-#endif // DEBUG
-
-/*****************************************************************************
- *
- * Generate code for a GTK_CONST tree
- */
-
-void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- noway_assert(tree->IsCnsIntOrI());
-
- ssize_t ival = tree->gtIntConCommon.IconValue();
- regMaskTP needReg = destReg;
- regNumber reg;
- bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
-
-#if REDUNDANT_LOAD
-
- /* If we are targeting destReg and ival is zero */
- /* we would rather xor needReg than copy another register */
-
- if (!needReloc)
- {
- bool reuseConstantInReg = false;
-
- if (destReg == RBM_NONE)
- reuseConstantInReg = true;
-
-#ifdef _TARGET_ARM_
- // If we can set a register to a constant with a small encoding, then do that.
- // Assume we'll get a low register if needReg has low registers as options.
- if (!reuseConstantInReg &&
- !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
- {
- reuseConstantInReg = true;
- }
-#else
- if (!reuseConstantInReg && ival != 0)
- reuseConstantInReg = true;
-#endif
-
- if (reuseConstantInReg)
- {
- /* Is the constant already in register? If so, use this register */
-
- reg = regTracker.rsIconIsInReg(ival);
- if (reg != REG_NA)
- goto REG_LOADED;
- }
- }
-
-#endif // REDUNDANT_LOAD
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- /* If the constant is a handle, we need a reloc to be applied to it */
-
- if (needReloc)
- {
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
- regTracker.rsTrackRegTrash(reg);
- }
- else
- {
- genSetRegToIcon(reg, ival, tree->TypeGet());
- }
-
-REG_LOADED:
-
-#ifdef DEBUG
- /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
-
- if (!genTempLiveChg)
- {
- VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
- genTempLiveChg = true;
- }
-#endif
-
- gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a GTK_LEAF tree
- */
-
-void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- genTreeOps oper = tree->OperGet();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP regs = regSet.rsMaskUsed;
- regMaskTP needReg = destReg;
- size_t size;
-
- noway_assert(tree->OperKind() & GTK_LEAF);
-
- switch (oper)
- {
- case GT_REG_VAR:
- NO_WAY("GT_REG_VAR should have been caught above");
- break;
-
- case GT_LCL_VAR:
-
- /* Does the variable live in a register? */
-
- if (genMarkLclVar(tree))
- {
- genCodeForTree_REG_VAR1(tree);
- return;
- }
-
-#if REDUNDANT_LOAD
-
- /* Is the local variable already in register? */
-
- reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
-
- if (reg != REG_NA)
- {
- /* Use the register the variable happens to be in */
- regMaskTP regMask = genRegMask(reg);
-
- // If the register that it was in isn't one of the needRegs
- // then try to move it into a needReg register
-
- if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
- {
- regNumber rg2 = reg;
- reg = regSet.rsPickReg(needReg, bestReg);
- if (reg != rg2)
- {
- regMask = genRegMask(reg);
- inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
- }
- }
-
- gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
- regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
- break;
- }
-
-#endif
- goto MEM_LEAF;
-
- case GT_LCL_FLD:
-
- // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
- // to worry about it being enregistered.
- noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
- goto MEM_LEAF;
-
- case GT_CLS_VAR:
-
- MEM_LEAF:
-
- /* Pick a register for the value */
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- /* Load the variable into the register */
-
- size = genTypeSize(tree->gtType);
-
- if (size < EA_4BYTE)
- {
- instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
- inst_RV_TT(ins, reg, tree, 0);
-
- /* We've now "promoted" the tree-node to TYP_INT */
-
- tree->gtType = TYP_INT;
- }
- else
- {
- inst_RV_TT(INS_mov, reg, tree, 0);
- }
-
- regTracker.rsTrackRegTrash(reg);
-
- gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
-
- switch (oper)
- {
- case GT_CLS_VAR:
- regTracker.rsTrackRegClsVar(reg, tree);
- break;
- case GT_LCL_VAR:
- regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
- break;
- case GT_LCL_FLD:
- break;
- default:
- noway_assert(!"Unexpected oper");
- }
-
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-
- break;
-
- case GT_NO_OP:
- // The VM does certain things with actual NOP instructions
- // so generate something small that has no effect, but isn't
- // a typical NOP
- if (tree->gtFlags & GTF_NO_OP_NO)
- {
-#ifdef _TARGET_XARCH_
- // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
- instGen(INS_nop);
- instGen(INS_nop);
-#elif defined(_TARGET_ARM_)
- // The VM isn't checking yet, when it does, hopefully it will
- // get fooled by the wider variant.
- instGen(INS_nopw);
-#else
- NYI("Non-nop NO_OP");
-#endif
- }
- else
- {
- instGen(INS_nop);
- }
- reg = REG_STK;
- break;
-
-#if !FEATURE_EH_FUNCLETS
- case GT_END_LFIN:
-
- /* Have to clear the shadowSP of the nesting level which
- encloses the finally */
-
- unsigned finallyNesting;
- finallyNesting = (unsigned)tree->gtVal.gtVal1;
- noway_assert(tree->gtVal.gtVal1 <
- compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
- noway_assert(finallyNesting < compiler->compHndBBtabCount);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
- sizeof(void*)); // below doesn't underflow.
- filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
- reg = REG_STK;
- break;
-#endif // !FEATURE_EH_FUNCLETS
-
- case GT_CATCH_ARG:
-
- noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
-
- /* Catch arguments get passed in a register. genCodeForBBlist()
- would have marked it as holding a GC object, but not used. */
-
- noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
- reg = REG_EXCEPTION_OBJECT;
- break;
-
- case GT_JMP:
- genCodeForTreeLeaf_GT_JMP(tree);
- return;
-
- case GT_MEMORYBARRIER:
- // Emit the memory barrier instruction
- instGen_MemoryBarrier();
- reg = REG_STK;
- break;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected leaf");
- }
-
- noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
- genCodeForTree_DONE(tree, reg);
-}
-
-GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree)
-{
- while (tree->OperGet() == GT_COMMA)
- {
- GenTreePtr op1 = tree->gtOp.gtOp1;
- genCodeForTree(op1, RBM_NONE);
- gcInfo.gcMarkRegPtrVal(op1);
-
- tree = tree->gtOp.gtOp2;
- }
- return tree;
-}
-
-/*****************************************************************************
- *
- * Generate code for the a leaf node of type GT_JMP
- */
-
-void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
-{
- noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
-
-#ifdef PROFILING_SUPPORTED
- if (compiler->compIsProfilerHookNeeded())
- {
- /* fire the event at the call site */
- unsigned saveStackLvl2 = genStackLevel;
-
- compiler->info.compProfilerCallback = true;
-
-#ifdef _TARGET_X86_
- //
- // Push the profilerHandle
- //
- regMaskTP byrefPushedRegs;
- regMaskTP norefPushedRegs;
- regMaskTP pushedArgRegs =
- genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
- &norefPushedRegs);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
- (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
- }
- genSinglePush();
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
- sizeof(int) * 1, // argSize
- EA_UNKNOWN); // retSize
-
- //
- // Adjust the number of stack slots used by this managed method if necessary.
- //
- if (compiler->fgPtrArgCntMax < 1)
- {
- compiler->fgPtrArgCntMax = 1;
- }
-
- genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
-#elif _TARGET_ARM_
- // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
- // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
- regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
- noway_assert(argReg == REG_PROFILER_JMP_ARG);
- regSet.rsLockReg(RBM_PROFILER_JMP_USED);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
- regTracker.rsTrackRegTrash(argReg);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
- }
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
- 0, // argSize
- EA_UNKNOWN); // retSize
-
- regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
-#else
- NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
-#endif //_TARGET_X86_
-
- /* Restore the stack level */
- genStackLevel = saveStackLvl2;
- }
-#endif // PROFILING_SUPPORTED
-
- /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to
- * call genPInvokeMethodEpilog anywhere that we have a method return. We should really
- * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
- */
-
- if (compiler->info.compCallUnmanaged)
- {
- genPInvokeMethodEpilog();
- }
-
- // Make sure register arguments are in their initial registers
- // and stack arguments are put back as well.
- //
- // This does not deal with circular dependencies of register
- // arguments, which is safe because RegAlloc prevents that by
- // not enregistering any RegArgs when a JMP opcode is used.
-
- if (compiler->info.compArgsCount == 0)
- {
- return;
- }
-
- unsigned varNum;
- LclVarDsc* varDsc;
-
- // First move any enregistered stack arguments back to the stack
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
- {
- noway_assert(varDsc->lvIsParam);
- if (varDsc->lvIsRegArg || !varDsc->lvRegister)
- continue;
-
- /* Argument was passed on the stack, but ended up in a register
- * Store it back to the stack */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifndef _TARGET_64BIT_
- if (varDsc->TypeGet() == TYP_LONG)
- {
- /* long - at least the low half must be enregistered */
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
-
- /* Is the upper half also enregistered? */
-
- if (varDsc->lvOtherReg != REG_STK)
- {
- getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
- }
- }
- else
-#endif // _TARGET_64BIT_
- {
- getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
- varNum, 0);
- }
- }
-
-#ifdef _TARGET_ARM_
- regMaskTP fixedArgsMask = RBM_NONE;
-#endif
-
- // Next move any un-enregistered register arguments back to their register
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
- {
- /* Is this variable a register arg? */
-
- if (!varDsc->lvIsRegArg)
- continue;
-
- /* Register argument */
-
- noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
- noway_assert(!varDsc->lvRegister);
-
- /* Reload it from the stack */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifndef _TARGET_64BIT_
- if (varDsc->TypeGet() == TYP_LONG)
- {
- /* long - at least the low half must be enregistered */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
- regTracker.rsTrackRegTrash(varDsc->lvArgReg);
-
- /* Also assume the upper half also enregistered */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
- sizeof(int));
- regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
-
-#ifdef _TARGET_ARM_
- fixedArgsMask |= genRegMask(varDsc->lvArgReg);
- fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
-#endif
- }
- else
-#endif // _TARGET_64BIT_
-#ifdef _TARGET_ARM_
- if (varDsc->lvIsHfaRegArg())
- {
- const var_types elemType = varDsc->GetHfaType();
- const instruction loadOp = ins_Load(elemType);
- const emitAttr size = emitTypeSize(elemType);
- regNumber argReg = varDsc->lvArgReg;
- const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
-
- for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
- {
- getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
- assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
- argReg = regNextOfType(argReg, elemType);
- }
- }
- else if (varDsc->TypeGet() == TYP_STRUCT)
- {
- const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes
- const instruction loadOp = ins_Load(elemType);
- const emitAttr size = emitTypeSize(elemType);
- regNumber argReg = varDsc->lvArgReg;
- const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
-
- for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
- {
- getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
- regTracker.rsTrackRegTrash(argReg);
-
- fixedArgsMask |= genRegMask(argReg);
-
- argReg = genRegArgNext(argReg);
- }
- }
- else
-#endif //_TARGET_ARM_
- {
- var_types loadType = varDsc->TypeGet();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
- bool twoParts = false;
-
- if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
- {
-#ifndef _TARGET_64BIT_
- if (loadType == TYP_DOUBLE)
- twoParts = true;
-#endif
- loadType = TYP_I_IMPL;
- assert(isValidIntArgReg(argReg));
- }
-
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
- regTracker.rsTrackRegTrash(argReg);
-
-#ifdef _TARGET_ARM_
- fixedArgsMask |= genRegMask(argReg);
-#endif
- if (twoParts)
- {
- argReg = genRegArgNext(argReg);
- assert(isValidIntArgReg(argReg));
-
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
- regTracker.rsTrackRegTrash(argReg);
-
-#ifdef _TARGET_ARM_
- fixedArgsMask |= genRegMask(argReg);
-#endif
- }
- }
- }
-
-#ifdef _TARGET_ARM_
- // Check if we have any non-fixed args possibly in the arg registers.
- if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
- {
- noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
-
- regNumber regDeclArgs = REG_ARG_FIRST;
-
- // Skip the 'this' pointer.
- if (!compiler->info.compIsStatic)
- {
- regDeclArgs = REG_NEXT(regDeclArgs);
- }
-
- // Skip the 'generic context.'
- if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
- {
- regDeclArgs = REG_NEXT(regDeclArgs);
- }
-
- // Skip any 'return buffer arg.'
- if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
- {
- regDeclArgs = REG_NEXT(regDeclArgs);
- }
-
- // Skip the 'vararg cookie.'
- regDeclArgs = REG_NEXT(regDeclArgs);
-
- // Also add offset for the vararg cookie.
- int offset = REGSIZE_BYTES;
-
- // Load all the variable arguments in registers back to their registers.
- for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
- {
- if (!(fixedArgsMask & genRegMask(reg)))
- {
- getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
- regTracker.rsTrackRegTrash(reg);
- }
- offset += REGSIZE_BYTES;
- }
- }
-#endif // _TARGET_ARM_
-}
-
-/*****************************************************************************
- *
- * Check if a variable is assigned to in a tree. The variable number is
- * passed in pCallBackData. If the variable is assigned to, return
- * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE.
- */
-Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data)
-{
- GenTreePtr tree = *pTree;
- if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
- (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
- {
- return Compiler::WALK_ABORT;
- }
-
- return Compiler::WALK_CONTINUE;
-}
-
-regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
-{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- if (tree->gtOper == GT_LCL_VAR)
- {
- /* Does the variable live in a register? */
-
- varNum = tree->gtLclVarCommon.gtLclNum;
- noway_assert(varNum < compiler->lvaCount);
- varDsc = compiler->lvaTable + varNum;
-
- if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
- {
- return varDsc->lvRegNum;
- }
- }
-
- return REG_NA;
-}
-
-// inline
-void CodeGen::unspillLiveness(genLivenessSet* ls)
-{
- // Only try to unspill the registers that are missing from the currentLiveRegs
- //
- regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
- regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
- cannotSpillMask &= ~currentLiveRegs;
-
- // Typically this will always be true and we will return
- //
- if (cannotSpillMask == 0)
- return;
-
- for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
- {
- // Is this a register that we cannot leave in the spilled state?
- //
- if ((cannotSpillMask & genRegMask(reg)) == 0)
- continue;
-
- RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
-
- // Was it spilled, if not then skip it.
- //
- if (!spill)
- continue;
-
- noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
-
- regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
- }
-}
-
-/*****************************************************************************
- *
- * Generate code for a qmark colon
- */
-
-void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- regNumber reg;
- regMaskTP regs = regSet.rsMaskUsed;
- regMaskTP needReg = destReg;
-
- noway_assert(compiler->compQmarkUsed);
- noway_assert(tree->gtOper == GT_QMARK);
- noway_assert(op1->OperIsCompare());
- noway_assert(op2->gtOper == GT_COLON);
-
- GenTreePtr thenNode = op2->AsColon()->ThenNode();
- GenTreePtr elseNode = op2->AsColon()->ElseNode();
-
- /* If elseNode is a Nop node you must reverse the
- thenNode and elseNode prior to reaching here!
- (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
-
- noway_assert(!elseNode->IsNothingNode());
-
- /* Try to implement the qmark colon using a CMOV. If we can't for
- whatever reason, this will return false and we will implement
- it using regular branching constructs. */
-
- if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
- return;
-
- /*
- This is a ?: operator; generate code like this:
-
- condition_compare
- jmp_if_true lab_true
-
- lab_false:
- op1 (false = 'else' part)
- jmp lab_done
-
- lab_true:
- op2 (true = 'then' part)
-
- lab_done:
-
-
- NOTE: If no 'then' part we do not generate the 'jmp lab_done'
- or the 'lab_done' label
- */
-
- BasicBlock* lab_true;
- BasicBlock* lab_false;
- BasicBlock* lab_done;
-
- genLivenessSet entryLiveness;
- genLivenessSet exitLiveness;
-
- lab_true = genCreateTempLabel();
- lab_false = genCreateTempLabel();
-
-#if FEATURE_STACK_FP_X87
- /* Spill any register that hold partial values so that the exit liveness
- from sides is the same */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
- regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
-
- // spillMask should be the whole FP stack
- noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
-#endif
-
- SpillTempsStackFP(regSet.rsMaskUsedFloat);
- noway_assert(regSet.rsMaskUsedFloat == 0);
-#endif
-
- /* Before we generate code for qmark, we spill all the currently used registers
- that conflict with the registers used in the qmark tree. This is to avoid
- introducing spills that only occur on either the 'then' or 'else' side of
- the tree, but not both identically. We need to be careful with enregistered
- variables that are used; see below.
- */
-
- if (regSet.rsMaskUsed)
- {
- /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
- variable), then it may not get spilled. However, the variable may
- then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
- may get spilled from one side and not the other. So unmark regSet.rsMaskVars
- before spilling regSet.rsMaskUsed */
-
- regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
- regMaskTP rsAdditional = RBM_NONE;
-
- // For each multi-use of an enregistered variable, we need to determine if
- // it can get spilled inside the qmark colon. This can only happen if
- // its life ends somewhere in the qmark colon. We have the following
- // cases:
- // 1) Variable is dead at the end of the colon -- needs to be spilled
- // 2) Variable is alive at the end of the colon -- needs to be spilled
- // iff it is assigned to in the colon. In order to determine that, we
- // examine the GTF_ASG flag to see if any assignments were made in the
- // colon. If there are any, we need to do a tree walk to see if this
- // variable is the target of an assignment. This treewalk should not
- // happen frequently.
- if (rsAdditionalCandidates)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- Compiler::printTreeID(tree);
- printf(": Qmark-Colon additional spilling candidates are ");
- dspRegMask(rsAdditionalCandidates);
- printf("\n");
- }
-#endif
-
- // If any candidates are not alive at the GT_QMARK node, then they
- // need to be spilled
-
- VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife);
- VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife,
- compiler->compCurLifeTree, tree));
-
- VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow,
- VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
-
- VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- // Find the variable in compiler->lvaTable
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-
-#if !FEATURE_FP_REGALLOC
- if (varDsc->IsFloatRegType())
- continue;
-#endif
-
- noway_assert(varDsc->lvRegister);
-
- regMaskTP regBit;
-
- if (varTypeIsFloating(varDsc->TypeGet()))
- {
- regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
- }
- else
- {
- regBit = genRegMask(varDsc->lvRegNum);
-
- // For longs we may need to spill both regs
- if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
- regBit |= genRegMask(varDsc->lvOtherReg);
- }
-
- // Is it one of our reg-use vars? If not, we don't need to spill it.
- regBit &= rsAdditionalCandidates;
- if (!regBit)
- continue;
-
- // Is the variable live at the end of the colon?
- if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
- {
- // Variable is alive at the end of the colon. Was it assigned
- // to inside the colon?
-
- if (!(op2->gtFlags & GTF_ASG))
- continue;
-
- if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
- Compiler::WALK_ABORT)
- {
- // Variable was assigned to, so we need to spill it.
-
- rsAdditional |= regBit;
-#ifdef DEBUG
- if (compiler->verbose)
- {
- Compiler::printTreeID(tree);
- printf(": Qmark-Colon candidate ");
- dspRegMask(regBit);
- printf("\n");
- printf(" is assigned to inside colon and will be spilled\n");
- }
-#endif
- }
- }
- else
- {
- // Variable is not alive at the end of the colon. We need to spill it.
-
- rsAdditional |= regBit;
-#ifdef DEBUG
- if (compiler->verbose)
- {
- Compiler::printTreeID(tree);
- printf(": Qmark-Colon candidate ");
- dspRegMask(regBit);
- printf("\n");
- printf(" is alive at end of colon and will be spilled\n");
- }
-#endif
- }
- }
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- Compiler::printTreeID(tree);
- printf(": Qmark-Colon approved additional spilling candidates are ");
- dspRegMask(rsAdditional);
- printf("\n");
- }
-#endif
- }
-
- noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
-
- // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
- // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
- // we will have unbalanced spills and generate bad code.
- regMaskTP rsSpill =
- ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
-
-#ifdef DEBUG
- // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
- // 'bad' registers, causing spills. So, just force all used registers to get spilled
- // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
- // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
- // unspilled while generating that same tree.
-
- if (regSet.rsStressRegs() >= 1)
- {
- rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
- }
-#endif // DEBUG
-
- if (rsSpill)
- {
- // Remember which registers hold pointers. We will spill
- // them, but the code that follows will fetch reg vars from
- // the registers, so we need that gc compiler->info.
- regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
- regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
-
- // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
- // So, pretend there aren't any, and spill them anyway. This will only occur
- // if rsAdditional is non-empty.
- regMaskTP rsTemp = regSet.rsMaskVars;
- regSet.ClearMaskVars();
-
- regSet.rsSpillRegs(rsSpill);
-
- // Restore gc tracking masks.
- gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
- gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
-
- // Set regSet.rsMaskVars back to normal
- regSet.rsMaskVars = rsTemp;
- }
- }
-
- // Generate the conditional jump but without doing any StackFP fixups.
- genCondJump(op1, lab_true, lab_false, false);
-
- /* Save the current liveness, register status, and GC pointers */
- /* This is the liveness information upon entry */
- /* to both the then and else parts of the qmark */
-
- saveLiveness(&entryLiveness);
-
- /* Clear the liveness of any local variables that are dead upon */
- /* entry to the else part. */
-
- /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
- /* from the "colon or op2" liveSet */
- genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
-
- /* genCondJump() closes the current emitter block */
-
- genDefineTempLabel(lab_false);
-
-#if FEATURE_STACK_FP_X87
- // Store fpstate
-
- QmarkStateStackFP tempFPState;
- bool bHasFPUState = !compCurFPState.IsEmpty();
- genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
-#endif
-
- /* Does the operator yield a value? */
-
- if (tree->gtType == TYP_VOID)
- {
- /* Generate the code for the else part of the qmark */
-
- genCodeForTree(elseNode, needReg, bestReg);
-
- /* The type is VOID, so we shouldn't have computed a value */
-
- noway_assert(!(elseNode->gtFlags & GTF_REG_VAL));
-
- /* Save the current liveness, register status, and GC pointers */
- /* This is the liveness information upon exit of the then part of the qmark */
-
- saveLiveness(&exitLiveness);
-
- /* Is there a 'then' part? */
-
- if (thenNode->IsNothingNode())
- {
-#if FEATURE_STACK_FP_X87
- if (bHasFPUState)
- {
- // We had FP state on entry just after the condition, so potentially, the else
- // node may have to do transition work.
- lab_done = genCreateTempLabel();
-
- /* Generate jmp lab_done */
-
- inst_JMP(EJ_jmp, lab_done);
-
- /* No 'then' - just generate the 'lab_true' label */
-
- genDefineTempLabel(lab_true);
-
- // We need to do this after defining the lab_false label
- genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
- genQMarkAfterThenBlockStackFP(&tempFPState);
- genDefineTempLabel(lab_done);
- }
- else
-#endif // FEATURE_STACK_FP_X87
- {
- /* No 'then' - just generate the 'lab_true' label */
- genDefineTempLabel(lab_true);
- }
- }
- else
- {
- lab_done = genCreateTempLabel();
-
- /* Generate jmp lab_done */
-
- inst_JMP(EJ_jmp, lab_done);
-
- /* Restore the liveness that we had upon entry of the then part of the qmark */
-
- restoreLiveness(&entryLiveness);
-
- /* Clear the liveness of any local variables that are dead upon */
- /* entry to the then part. */
- genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
-
- /* Generate lab_true: */
-
- genDefineTempLabel(lab_true);
-#if FEATURE_STACK_FP_X87
- // We need to do this after defining the lab_false label
- genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
-#endif
- /* Enter the then part - trash all registers */
-
- regTracker.rsTrackRegClr();
-
- /* Generate the code for the then part of the qmark */
-
- genCodeForTree(thenNode, needReg, bestReg);
-
- /* The type is VOID, so we shouldn't have computed a value */
-
- noway_assert(!(thenNode->gtFlags & GTF_REG_VAL));
-
- unspillLiveness(&exitLiveness);
-
- /* Verify that the exit liveness information is the same for the two parts of the qmark */
-
- checkLiveness(&exitLiveness);
-#if FEATURE_STACK_FP_X87
- genQMarkAfterThenBlockStackFP(&tempFPState);
-#endif
- /* Define the "result" label */
-
- genDefineTempLabel(lab_done);
- }
-
- /* Join of the two branches - trash all registers */
-
- regTracker.rsTrackRegClr();
-
- /* We're just about done */
-
- genUpdateLife(tree);
- }
- else
- {
- /* Generate code for a qmark that generates a value */
-
- /* Generate the code for the else part of the qmark */
-
- noway_assert(elseNode->IsNothingNode() == false);
-
- /* Compute the elseNode into any free register */
- genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
- noway_assert(elseNode->gtFlags & GTF_REG_VAL);
- noway_assert(elseNode->gtRegNum != REG_NA);
-
- /* Record the chosen register */
- reg = elseNode->gtRegNum;
- regs = genRegMask(reg);
-
- /* Save the current liveness, register status, and GC pointers */
- /* This is the liveness information upon exit of the else part of the qmark */
-
- saveLiveness(&exitLiveness);
-
- /* Generate jmp lab_done */
- lab_done = genCreateTempLabel();
-
-#ifdef DEBUG
- // We will use this to assert we don't emit instructions if we decide not to
- // do the jmp
- unsigned emittedInstructions = getEmitter()->emitInsCount;
- bool bSkippedJump = false;
-#endif
- // We would like to know here if the else node is really going to generate
- // code, as if it isn't, we're generating here a jump to the next instruction.
- // What you would really like is to be able to go back and remove the jump, but
- // we have no way of doing that right now.
-
- if (
-#if FEATURE_STACK_FP_X87
- !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
-#endif
- genIsEnregisteredIntVariable(thenNode) == reg)
- {
-#ifdef DEBUG
- // For the moment, fix this easy case (enregistered else node), which
- // is the one that happens all the time.
-
- bSkippedJump = true;
-#endif
- }
- else
- {
- inst_JMP(EJ_jmp, lab_done);
- }
-
- /* Restore the liveness that we had upon entry of the else part of the qmark */
-
- restoreLiveness(&entryLiveness);
-
- /* Clear the liveness of any local variables that are dead upon */
- /* entry to the then part. */
- genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
-
- /* Generate lab_true: */
- genDefineTempLabel(lab_true);
-#if FEATURE_STACK_FP_X87
- // Store FP state
-
- // We need to do this after defining the lab_true label
- genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
-#endif
- /* Enter the then part - trash all registers */
-
- regTracker.rsTrackRegClr();
-
- /* Generate the code for the then part of the qmark */
-
- noway_assert(thenNode->IsNothingNode() == false);
-
- /* This must place a value into the chosen register */
- genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
-
- noway_assert(thenNode->gtFlags & GTF_REG_VAL);
- noway_assert(thenNode->gtRegNum == reg);
-
- unspillLiveness(&exitLiveness);
-
- /* Verify that the exit liveness information is the same for the two parts of the qmark */
- checkLiveness(&exitLiveness);
-#if FEATURE_STACK_FP_X87
- genQMarkAfterThenBlockStackFP(&tempFPState);
-#endif
-
-#ifdef DEBUG
- noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
-#endif
-
- /* Define the "result" label */
- genDefineTempLabel(lab_done);
-
- /* Join of the two branches - trash all registers */
-
- regTracker.rsTrackRegClr();
-
- /* Check whether this subtree has freed up any variables */
-
- genUpdateLife(tree);
-
- genMarkTreeInReg(tree, reg);
- }
-}
-
-/*****************************************************************************
- *
- * Generate code for a qmark colon using the CMOV instruction. It's OK
- * to return false when we can't easily implement it using a cmov (leading
- * genCodeForQmark to implement it using branches).
- */
-
-bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
-#ifdef _TARGET_XARCH_
- GenTreePtr cond = tree->gtOp.gtOp1;
- GenTreePtr colon = tree->gtOp.gtOp2;
- // Warning: this naming of the local vars is backwards!
- GenTreePtr thenNode = colon->gtOp.gtOp1;
- GenTreePtr elseNode = colon->gtOp.gtOp2;
- GenTreePtr alwaysNode, predicateNode;
- regNumber reg;
- regMaskTP needReg = destReg;
-
- noway_assert(tree->gtOper == GT_QMARK);
- noway_assert(cond->OperIsCompare());
- noway_assert(colon->gtOper == GT_COLON);
-
-#ifdef DEBUG
- if (JitConfig.JitNoCMOV())
- {
- return false;
- }
-#endif
-
- /* Can only implement CMOV on processors that support it */
-
- if (!compiler->opts.compUseCMOV)
- {
- return false;
- }
-
- /* thenNode better be a local or a constant */
-
- if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
- {
- return false;
- }
-
- /* elseNode better be a local or a constant or nothing */
-
- if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
- {
- return false;
- }
-
- /* can't handle two constants here */
-
- if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
- {
- return false;
- }
-
- /* let's not handle comparisons of non-integer types */
-
- if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
- {
- return false;
- }
-
- /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary.
- The biggest constraint is that cmov doesn't take an integer argument.
- */
-
- bool reverseCond = false;
- if (elseNode->OperGet() == GT_CNS_INT)
- {
- // else node is a constant
-
- alwaysNode = elseNode;
- predicateNode = thenNode;
- reverseCond = true;
- }
- else
- {
- alwaysNode = thenNode;
- predicateNode = elseNode;
- }
-
- // If the live set in alwaysNode is not the same as in tree, then
- // the variable in predicate node dies here. This is a dangerous
- // case that we don't handle (genComputeReg could overwrite
- // the value of the variable in the predicate node).
-
- // This assert is just paranoid (we've already asserted it above)
- assert(predicateNode->OperGet() == GT_LCL_VAR);
- if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
- {
- return false;
- }
-
- // Pass this point we are comitting to use CMOV.
-
- if (reverseCond)
- {
- compiler->gtReverseCond(cond);
- }
-
- emitJumpKind jumpKind = genCondSetFlags(cond);
-
- // Compute the always node into any free register. If it's a constant,
- // we need to generate the mov instruction here (otherwise genComputeReg might
- // modify the flags, as in xor reg,reg).
-
- if (alwaysNode->OperGet() == GT_CNS_INT)
- {
- reg = regSet.rsPickReg(needReg, bestReg);
- inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
- gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
- regTracker.rsTrackRegTrash(reg);
- }
- else
- {
- genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
- noway_assert(alwaysNode->gtFlags & GTF_REG_VAL);
- noway_assert(alwaysNode->gtRegNum != REG_NA);
-
- // Record the chosen register
-
- reg = alwaysNode->gtRegNum;
- }
-
- regNumber regPredicate = REG_NA;
-
- // Is predicateNode an enregistered variable?
-
- if (genMarkLclVar(predicateNode))
- {
- // Variable lives in a register
-
- regPredicate = predicateNode->gtRegNum;
- }
-#if REDUNDANT_LOAD
- else
- {
- // Checks if the variable happens to be in any of the registers
-
- regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
- }
-#endif
-
- const static instruction EJtoCMOV[] = {INS_nop, INS_nop, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae,
- INS_cmove, INS_cmovne, INS_cmovbe, INS_cmova, INS_cmovs, INS_cmovns,
- INS_cmovpe, INS_cmovpo, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg};
-
- noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
- instruction cmov_ins = EJtoCMOV[jumpKind];
-
- noway_assert(insIsCMOV(cmov_ins));
-
- if (regPredicate != REG_NA)
- {
- // regPredicate is in a register
-
- inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
- }
- else
- {
- // regPredicate is in memory
-
- inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
- }
- gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
- regTracker.rsTrackRegTrash(reg);
-
- genUpdateLife(alwaysNode);
- genUpdateLife(predicateNode);
- genCodeForTree_DONE_LIFE(tree, reg);
- return true;
-#else
- return false;
-#endif
-}
-
-#ifdef _TARGET_XARCH_
-void CodeGen::genCodeForMultEAX(GenTreePtr tree)
-{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
- bool ovfl = tree->gtOverflow();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP addrReg;
-
- noway_assert(tree->OperGet() == GT_MUL);
-
- /* We'll evaluate 'op1' first */
-
- regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
-
- /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
-
- genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- // If op2 is a constant we need to load the constant into a register
- if (op2->OperKind() & GTK_CONST)
- {
- genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regSet.rsMarkRegUsed(op2);
- addrReg = genRegMask(op2->gtRegNum);
- }
- else
- {
- /* Make the second operand addressable */
- // Try to avoid EAX.
- addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
- }
-
- /* Make sure the first operand is still in a register */
- // op1 *must* go into EAX.
- genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- // For 8 bit operations, we need to pick byte addressable registers
-
- if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, reg);
-
- regTracker.rsTrackRegTrash(byteReg);
- regSet.rsMarkRegFree(genRegMask(reg));
-
- reg = byteReg;
- op1->gtRegNum = reg;
- regSet.rsMarkRegUsed(op1);
- }
-
- /* Make sure the operand is still addressable */
- addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
-
- /* Free up the operand, if it's a regvar */
-
- genUpdateLife(op2);
-
- /* The register is about to be trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- // For overflow instructions, tree->TypeGet() is the accurate type,
- // and gives us the size for the operands.
-
- emitAttr opSize = emitTypeSize(tree->TypeGet());
-
- /* Compute the new value */
-
- noway_assert(op1->gtRegNum == REG_EAX);
-
- // Make sure Edx is free (unless used by op2 itself)
- bool op2Released = false;
-
- if ((addrReg & RBM_EDX) == 0)
- {
- // op2 does not use Edx, so make sure noone else does either
- regSet.rsGrabReg(RBM_EDX);
- }
- else if (regSet.rsMaskMult & RBM_EDX)
- {
- /* Edx is used by op2 and some other trees.
- Spill the other trees besides op2. */
-
- regSet.rsGrabReg(RBM_EDX);
- op2Released = true;
-
- /* keepReg==RegSet::FREE_REG so that the other multi-used trees
- don't get marked as unspilled as well. */
- regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
- }
-
- instruction ins;
-
- if (tree->gtFlags & GTF_UNSIGNED)
- ins = INS_mulEAX;
- else
- ins = INS_imulEAX;
-
- inst_TT(ins, op2, 0, 0, opSize);
-
- /* Both EAX and EDX are now trashed */
-
- regTracker.rsTrackRegTrash(REG_EAX);
- regTracker.rsTrackRegTrash(REG_EDX);
-
- /* Free up anything that was tied up by the operand */
-
- if (!op2Released)
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- /* The result will be where the first operand is sitting */
-
- /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
- genRecoverReg(op1, 0, RegSet::KEEP_REG);
-
- reg = op1->gtRegNum;
- noway_assert(reg == REG_EAX);
-
- genReleaseReg(op1);
-
- /* Do we need an overflow check */
-
- if (ovfl)
- genCheckOverflow(tree);
-
- genCodeForTree_DONE(tree, reg);
-}
-#endif // _TARGET_XARCH_
-
-#ifdef _TARGET_ARM_
-void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
-
- noway_assert(tree->OperGet() == GT_MUL);
-
- /* Generate the first operand into some register */
-
- genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Generate the second operand into some register */
-
- genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make sure the first operand is still in a register */
- genRecoverReg(op1, 0, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Free up the operands */
- genUpdateLife(tree);
-
- genReleaseReg(op1);
- genReleaseReg(op2);
-
- regNumber regLo = regSet.rsPickReg(destReg, bestReg);
- regNumber regHi;
-
- regSet.rsLockReg(genRegMask(regLo));
- regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
- regSet.rsUnlockReg(genRegMask(regLo));
-
- instruction ins;
- if (tree->gtFlags & GTF_UNSIGNED)
- ins = INS_umull;
- else
- ins = INS_smull;
-
- getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
- regTracker.rsTrackRegTrash(regHi);
- regTracker.rsTrackRegTrash(regLo);
-
- /* Do we need an overflow check */
-
- if (tree->gtOverflow())
- {
- // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
- //
- regSet.rsLockReg(genRegMask(regLo));
-
- if (tree->gtFlags & GTF_MUL_64RSLT)
- regSet.rsLockReg(genRegMask(regHi));
-
- regNumber regTmpHi = regHi;
- if ((tree->gtFlags & GTF_UNSIGNED) == 0)
- {
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
- regTmpHi = regSet.rsPickReg(RBM_ALLINT);
- getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
- regTracker.rsTrackRegTrash(regTmpHi);
- }
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
-
- // Jump to the block which will throw the expection
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
-
- // Unlock regLo [and regHi] after generating code for the gtOverflow() case
- //
- regSet.rsUnlockReg(genRegMask(regLo));
-
- if (tree->gtFlags & GTF_MUL_64RSLT)
- regSet.rsUnlockReg(genRegMask(regHi));
- }
-
- genUpdateLife(tree);
-
- if (tree->gtFlags & GTF_MUL_64RSLT)
- genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
- else
- genMarkTreeInReg(tree, regLo);
-}
-#endif // _TARGET_ARM_
-
-/*****************************************************************************
- *
- * Generate code for a simple binary arithmetic or logical operator.
- * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
- */
-
-void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- instruction ins;
- genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP needReg = destReg;
-
- /* Figure out what instruction to generate */
-
- bool isArith;
- switch (oper)
- {
- case GT_AND:
- ins = INS_AND;
- isArith = false;
- break;
- case GT_OR:
- ins = INS_OR;
- isArith = false;
- break;
- case GT_XOR:
- ins = INS_XOR;
- isArith = false;
- break;
- case GT_ADD:
- ins = INS_add;
- isArith = true;
- break;
- case GT_SUB:
- ins = INS_sub;
- isArith = true;
- break;
- case GT_MUL:
- ins = INS_MUL;
- isArith = true;
- break;
- default:
- unreached();
- }
-
-#ifdef _TARGET_XARCH_
- /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
-
- if ((oper == GT_MUL) &&
- op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate
- !op1->IsCnsIntOrI() && // op1 is not a constant
- (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
- !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm"
- !tree->gtOverflow()) // 3 operand imul doesn't set flags
- {
- /* Make the first operand addressable */
-
- regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
-
- /* Grab a register for the target */
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
-#if LEA_AVAILABLE
- /* Compute the value into the target: reg=op1*op2_icon */
- if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
- {
- regNumber regSrc;
- if (op1->gtFlags & GTF_REG_VAL)
- {
- regSrc = op1->gtRegNum;
- }
- else
- {
- inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
- regSrc = reg;
- }
- getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
- (op2->gtIntCon.gtIconVal & -2), 0);
- }
- else
-#endif // LEA_AVAILABLE
- {
- /* Compute the value into the target: reg=op1*op2_icon */
- inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
- }
-
- /* The register has been trashed now */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* The address is no longer live */
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-#endif // _TARGET_XARCH_
-
- bool ovfl = false;
-
- if (isArith)
- {
- // We only reach here for GT_ADD, GT_SUB and GT_MUL.
- assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
-
- ovfl = tree->gtOverflow();
-
- /* We record the accurate (small) types in trees only we need to
- * check for overflow. Otherwise we record genActualType()
- */
-
- noway_assert(ovfl || (treeType == genActualType(treeType)));
-
-#if LEA_AVAILABLE
-
- /* Can we use an 'lea' to compute the result?
- Can't use 'lea' for overflow as it doesn't set flags
- Can't use 'lea' unless we have at least two free registers */
- {
- bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
- genCountBits(regSet.rsMaskLock) + // Locked registers
- 2 // We will need two regisers
- <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
-
- regMaskTP regs = RBM_NONE; // OUT argument
- if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, &regs, false))
- {
- emitAttr size;
-
- /* Is the value now computed in some register? */
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- genCodeForTree_REG_VAR1(tree);
- return;
- }
-
- /* If we can reuse op1/2's register directly, and 'tree' is
- a simple expression (ie. not in scaled index form),
- might as well just use "add" instead of "lea" */
-
- // However, if we're in a context where we want to evaluate "tree" into a specific
- // register different from the reg we'd use in this optimization, then it doesn't
- // make sense to do the "add", since we'd also have to do a "mov."
- if (op1->gtFlags & GTF_REG_VAL)
- {
- reg = op1->gtRegNum;
-
- if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
- {
- if (op2->gtFlags & GTF_REG_VAL)
- {
- /* Simply add op2 to the register */
-
- inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- goto DONE_LEA_ADD;
- }
- else if (op2->OperGet() == GT_CNS_INT)
- {
- /* Simply add op2 to the register */
-
- genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
-
- goto DONE_LEA_ADD;
- }
- }
- }
-
- if (op2->gtFlags & GTF_REG_VAL)
- {
- reg = op2->gtRegNum;
-
- if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
- {
- if (op1->gtFlags & GTF_REG_VAL)
- {
- /* Simply add op1 to the register */
-
- inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- goto DONE_LEA_ADD;
- }
- }
- }
-
- // The expression either requires a scaled-index form, or the
- // op1 or op2's register can't be targeted, this can be
- // caused when op1 or op2 are enregistered variables.
-
- reg = regSet.rsPickReg(needReg, bestReg);
- size = emitActualTypeSize(treeType);
-
- /* Generate "lea reg, [addr-mode]" */
-
- inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
-
-#ifndef _TARGET_XARCH_
- // Don't call genFlagsEqualToReg on x86/x64
- // as it does not set the flags
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-#endif
-
- DONE_LEA_ADD:
- /* The register has been trashed now */
- regTracker.rsTrackRegTrash(reg);
-
- genDoneAddressable(tree, regs, RegSet::FREE_REG);
-
- /* The following could be an 'inner' pointer!!! */
-
- noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
-
- if (treeType == TYP_BYREF)
- {
- genUpdateLife(tree);
-
- gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
- gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
- }
-
-#endif // LEA_AVAILABLE
-
- noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
- }
-
- /* The following makes an assumption about gtSetEvalOrder(this) */
-
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Compute a useful register mask */
- needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
- needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
-
- // Determine what registers go live between op1 and op2
- // Don't bother checking if op1 is already in a register.
- // This is not just for efficiency; if it's already in a
- // register then it may already be considered "evaluated"
- // for the purposes of liveness, in which genNewLiveRegMask
- // will assert
- if (!op1->InReg())
- {
- regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
- if (newLiveMask)
- {
- needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
- }
- }
-
-#if CPU_HAS_BYTE_REGS
- /* 8-bit operations can only be done in the byte-regs */
- if (varTypeIsByte(treeType))
- needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
-#endif // CPU_HAS_BYTE_REGS
-
- // Try selecting one of the 'bestRegs'
- needReg = regSet.rsNarrowHint(needReg, bestReg);
-
- /* Special case: small_val & small_mask */
-
- if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
- {
- size_t and_val = op2->gtIntCon.gtIconVal;
- size_t andMask;
- var_types typ = op1->TypeGet();
-
- switch (typ)
- {
- case TYP_BOOL:
- case TYP_BYTE:
- case TYP_UBYTE:
- andMask = 0x000000FF;
- break;
- case TYP_SHORT:
- case TYP_CHAR:
- andMask = 0x0000FFFF;
- break;
- default:
- noway_assert(!"unexpected type");
- return;
- }
-
- // Is the 'and_val' completely contained within the bits found in 'andMask'
- if ((and_val & ~andMask) == 0)
- {
- // We must use unsigned instructions when loading op1
- if (varTypeIsByte(typ))
- {
- op1->gtType = TYP_UBYTE;
- }
- else // varTypeIsShort(typ)
- {
- assert(varTypeIsShort(typ));
- op1->gtType = TYP_CHAR;
- }
-
- /* Generate the first operand into a scratch register */
-
- op1 = genCodeForCommaTree(op1);
- genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regNumber op1Reg = op1->gtRegNum;
-
- // Did we end up in an acceptable register?
- // and do we have an acceptable free register available to grab?
- //
- if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
- {
- // See if we can pick a register from bestReg
- bestReg &= needReg;
-
- // Grab an acceptable register
- regNumber newReg;
- if ((bestReg & regSet.rsRegMaskFree()) != 0)
- newReg = regSet.rsGrabReg(bestReg);
- else
- newReg = regSet.rsGrabReg(needReg);
-
- noway_assert(op1Reg != newReg);
-
- /* Update the value in the target register */
-
- regTracker.rsTrackRegCopy(newReg, op1Reg);
-
- inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
-
- /* The value has been transferred to 'reg' */
-
- if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
-
- gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
-
- /* The value is now in an appropriate register */
-
- op1->gtRegNum = newReg;
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- genUpdateLife(op1);
-
- /* Mark the register as 'used' */
- regSet.rsMarkRegUsed(op1);
- reg = op1->gtRegNum;
-
- if (and_val != andMask) // Does the "and" mask only cover some of the bits?
- {
- /* "and" the value */
-
- inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
- }
-
-#ifdef DEBUG
- /* Update the live set of register variables */
- if (compiler->opts.varNames)
- genUpdateLife(tree);
-#endif
-
- /* Now we can update the register pointer information */
-
- genReleaseReg(op1);
- gcInfo.gcMarkRegPtrVal(reg, treeType);
-
- genCodeForTree_DONE_LIFE(tree, reg);
- return;
- }
- }
-
-#ifdef _TARGET_XARCH_
-
- // Do we have to use the special "imul" instruction
- // which has eax as the implicit operand ?
- //
- bool multEAX = false;
-
- if (oper == GT_MUL)
- {
- if (tree->gtFlags & GTF_MUL_64RSLT)
- {
- /* Only multiplying with EAX will leave the 64-bit
- * result in EDX:EAX */
-
- multEAX = true;
- }
- else if (ovfl)
- {
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- /* "mul reg/mem" always has EAX as default operand */
-
- multEAX = true;
- }
- else if (varTypeIsSmall(treeType))
- {
- /* Only the "imul with EAX" encoding has the 'w' bit
- * to specify the size of the operands */
-
- multEAX = true;
- }
- }
- }
-
- if (multEAX)
- {
- noway_assert(oper == GT_MUL);
-
- return genCodeForMultEAX(tree);
- }
-#endif // _TARGET_XARCH_
-
-#ifdef _TARGET_ARM_
-
- // Do we have to use the special 32x32 => 64 bit multiply
- //
- bool mult64 = false;
-
- if (oper == GT_MUL)
- {
- if (tree->gtFlags & GTF_MUL_64RSLT)
- {
- mult64 = true;
- }
- else if (ovfl)
- {
- // We always must use the 32x32 => 64 bit multiply
- // to detect overflow
- mult64 = true;
- }
- }
-
- if (mult64)
- {
- noway_assert(oper == GT_MUL);
-
- return genCodeForMult64(tree, destReg, bestReg);
- }
-#endif // _TARGET_ARM_
-
- /* Generate the first operand into a scratch register */
-
- op1 = genCodeForCommaTree(op1);
- genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regNumber op1Reg = op1->gtRegNum;
-
- // Setup needReg with the set of register that we require for op1 to be in
- //
- needReg = RBM_ALLINT;
-
- /* Compute a useful register mask */
- needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
- needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
-
-#if CPU_HAS_BYTE_REGS
- /* 8-bit operations can only be done in the byte-regs */
- if (varTypeIsByte(treeType))
- needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
-#endif // CPU_HAS_BYTE_REGS
-
- // Did we end up in an acceptable register?
- // and do we have an acceptable free register available to grab?
- //
- if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
- {
- // See if we can pick a register from bestReg
- bestReg &= needReg;
-
- // Grab an acceptable register
- regNumber newReg;
- if ((bestReg & regSet.rsRegMaskFree()) != 0)
- newReg = regSet.rsGrabReg(bestReg);
- else
- newReg = regSet.rsGrabReg(needReg);
-
- noway_assert(op1Reg != newReg);
-
- /* Update the value in the target register */
-
- regTracker.rsTrackRegCopy(newReg, op1Reg);
-
- inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
-
- /* The value has been transferred to 'reg' */
-
- if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
-
- gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
-
- /* The value is now in an appropriate register */
-
- op1->gtRegNum = newReg;
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- op1Reg = op1->gtRegNum;
-
- genUpdateLife(op1);
-
- /* Mark the register as 'used' */
- regSet.rsMarkRegUsed(op1);
-
- bool isSmallConst = false;
-
-#ifdef _TARGET_ARM_
- if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
- {
- isSmallConst = true;
- }
-#endif
- /* Make the second operand addressable */
-
- regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
-
-#if CPU_LOAD_STORE_ARCH
- genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
-#else // !CPU_LOAD_STORE_ARCH
- /* Is op1 spilled and op2 in a register? */
-
- if ((op1->gtFlags & GTF_SPILLED) && (op2->gtFlags & GTF_REG_VAL) && (ins != INS_sub))
- {
- noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
-
- // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
- noway_assert(op2->gtOper != GT_LCL_VAR ||
- varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
-
- reg = op2->gtRegNum;
- regMaskTP regMask = genRegMask(reg);
-
- /* Is the register holding op2 available? */
-
- if (regMask & regSet.rsMaskVars)
- {
- }
- else
- {
- /* Get the temp we spilled into. */
-
- TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
-
- /* For 8bit operations, we need to make sure that op2 is
- in a byte-addressable registers */
-
- if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, reg);
- regTracker.rsTrackRegTrash(byteReg);
-
- /* op2 couldn't have spilled as it was not sitting in
- RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- regSet.rsUnlockReg(regMask);
- regSet.rsMarkRegFree(regMask);
-
- reg = byteReg;
- regMask = genRegMask(reg);
- op2->gtRegNum = reg;
- regSet.rsMarkRegUsed(op2);
- }
-
- inst_RV_ST(ins, reg, temp, 0, treeType);
-
- regTracker.rsTrackRegTrash(reg);
-
- /* Free the temp */
-
- compiler->tmpRlsTemp(temp);
-
- /* 'add'/'sub' set all CC flags, others only ZF */
-
- /* If we need to check overflow, for small types, the
- * flags can't be used as we perform the arithmetic
- * operation (on small registers) and then sign extend it
- *
- * NOTE : If we ever don't need to sign-extend the result,
- * we can use the flags
- */
-
- if (tree->gtSetFlags())
- {
- genFlagsEqualToReg(tree, reg);
- }
-
- /* The result is where the second operand is sitting. Mark result reg as free */
- regSet.rsMarkRegFree(genRegMask(reg));
-
- gcInfo.gcMarkRegPtrVal(reg, treeType);
-
- goto CHK_OVF;
- }
- }
-#endif // !CPU_LOAD_STORE_ARCH
-
- /* Make sure the first operand is still in a register */
- regSet.rsLockUsedReg(addrReg);
- genRecoverReg(op1, 0, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regSet.rsUnlockUsedReg(addrReg);
-
- reg = op1->gtRegNum;
-
- // For 8 bit operations, we need to pick byte addressable registers
-
- if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, reg);
-
- regTracker.rsTrackRegTrash(byteReg);
- regSet.rsMarkRegFree(genRegMask(reg));
-
- reg = byteReg;
- op1->gtRegNum = reg;
- regSet.rsMarkRegUsed(op1);
- }
-
- /* Make sure the operand is still addressable */
- addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
-
- /* Free up the operand, if it's a regvar */
-
- genUpdateLife(op2);
-
- /* The register is about to be trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- bool op2Released = false;
-
- // For overflow instructions, tree->gtType is the accurate type,
- // and gives us the size for the operands.
-
- emitAttr opSize = emitTypeSize(treeType);
-
- /* Compute the new value */
-
- if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
-#if !CPU_HAS_FP_SUPPORT
- && (treeType == TYP_INT || treeType == TYP_I_IMPL)
-#endif
- )
- {
- ssize_t ival = op2->gtIntCon.gtIconVal;
-
- if (oper == GT_ADD)
- {
- genIncRegBy(reg, ival, tree, treeType, ovfl);
- }
- else if (oper == GT_SUB)
- {
- if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
- (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
- // Therefore we can't use -ival.
- )
- {
- /* For unsigned overflow, we have to use INS_sub to set
- the flags correctly */
-
- genDecRegBy(reg, ival, tree);
- }
- else
- {
- /* Else, we simply add the negative of the value */
-
- genIncRegBy(reg, -ival, tree, treeType, ovfl);
- }
- }
- else if (oper == GT_MUL)
- {
- genMulRegBy(reg, ival, tree, treeType, ovfl);
- }
- }
- else
- {
- // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
- op2 = op2->gtEffectiveVal();
- if (varTypeIsByte(treeType) && op2->InReg())
- {
- noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
-
- regNumber op2reg = op2->gtRegNum;
- regMaskTP op2regMask = genRegMask(op2reg);
-
- if (!(op2regMask & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, op2reg);
- regTracker.rsTrackRegTrash(byteReg);
-
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
- op2Released = true;
-
- op2->gtRegNum = byteReg;
- }
- }
-
- inst_RV_TT(ins, reg, op2, 0, opSize, flags);
- }
-
- /* Free up anything that was tied up by the operand */
-
- if (!op2Released)
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- /* The result will be where the first operand is sitting */
-
- /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
- genRecoverReg(op1, 0, RegSet::KEEP_REG);
-
- reg = op1->gtRegNum;
-
- /* 'add'/'sub' set all CC flags, others only ZF+SF */
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- genReleaseReg(op1);
-
-#if !CPU_LOAD_STORE_ARCH
-CHK_OVF:
-#endif // !CPU_LOAD_STORE_ARCH
-
- /* Do we need an overflow check */
-
- if (ovfl)
- genCheckOverflow(tree);
-
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
- * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
- */
-
-void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- instruction ins;
- const genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP needReg = destReg;
- regMaskTP addrReg;
-
- /* Figure out what instruction to generate */
-
- bool isArith;
- switch (oper)
- {
- case GT_ASG_AND:
- ins = INS_AND;
- isArith = false;
- break;
- case GT_ASG_OR:
- ins = INS_OR;
- isArith = false;
- break;
- case GT_ASG_XOR:
- ins = INS_XOR;
- isArith = false;
- break;
- case GT_ASG_ADD:
- ins = INS_add;
- isArith = true;
- break;
- case GT_ASG_SUB:
- ins = INS_sub;
- isArith = true;
- break;
- default:
- unreached();
- }
-
- bool ovfl = false;
-
- if (isArith)
- {
- // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
-
- ovfl = tree->gtOverflow();
-
- // We can't use += with overflow if the value cannot be changed
- // in case of an overflow-exception which the "+" might cause
- noway_assert(!ovfl ||
- ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
-
- /* Do not allow overflow instructions with refs/byrefs */
-
- noway_assert(!ovfl || !varTypeIsGC(treeType));
-
- // We disallow overflow and byte-ops here as it is too much trouble
- noway_assert(!ovfl || !varTypeIsByte(treeType));
-
- /* Is the second operand a constant? */
-
- if (op2->IsIntCnsFitsInI32())
- {
- int ival = (int)op2->gtIntCon.gtIconVal;
-
- /* What is the target of the assignment? */
-
- switch (op1->gtOper)
- {
- case GT_REG_VAR:
-
- REG_VAR4:
-
- reg = op1->gtRegVar.gtRegNum;
-
- /* No registers are needed for addressing */
-
- addrReg = RBM_NONE;
-#if !CPU_LOAD_STORE_ARCH
- INCDEC_REG:
-#endif
- /* We're adding a constant to a register */
-
- if (oper == GT_ASG_ADD)
- genIncRegBy(reg, ival, tree, treeType, ovfl);
- else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
- ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
- // 0x80000000.
- // Therefore we can't
- // use -ival.
- )
- /* For unsigned overflow, we have to use INS_sub to set
- the flags correctly */
- genDecRegBy(reg, ival, tree);
- else
- genIncRegBy(reg, -ival, tree, treeType, ovfl);
-
- break;
-
- case GT_LCL_VAR:
-
- /* Does the variable live in a register? */
-
- if (genMarkLclVar(op1))
- goto REG_VAR4;
-
- __fallthrough;
-
- default:
-
- /* Make the target addressable for load/store */
- addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
-
-#if !CPU_LOAD_STORE_ARCH
- // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
-
- /* For small types with overflow check, we need to
- sign/zero extend the result, so we need it in a reg */
-
- if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // !CPU_LOAD_STORE_ARCH
- {
- // Load op1 into a reg
-
- reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
-
- inst_RV_TT(INS_mov, reg, op1);
-
- // Issue the add/sub and the overflow check
-
- inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
- regTracker.rsTrackRegTrash(reg);
-
- if (ovfl)
- {
- genCheckOverflow(tree);
- }
-
- /* Store the (sign/zero extended) result back to
- the stack location of the variable */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
-
- break;
- }
-#if !CPU_LOAD_STORE_ARCH
- else
- {
- /* Add/subtract the new value into/from the target */
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- reg = op1->gtRegNum;
- goto INCDEC_REG;
- }
-
- /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
- if (!ovfl && (ival == 1 || ival == -1) &&
- !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
- {
- noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
- if (oper == GT_ASG_SUB)
- ival = -ival;
-
- ins = (ival > 0) ? INS_inc : INS_dec;
- inst_TT(ins, op1);
- }
- else
- {
- inst_TT_IV(ins, op1, ival);
- }
-
- if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
- {
- if (tree->gtSetFlags())
- genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
- }
-
- break;
- }
-#endif // !CPU_LOAD_STORE_ARCH
- } // end switch (op1->gtOper)
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
- return;
- } // end if (op2->IsIntCnsFitsInI32())
- } // end if (isArith)
-
- noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
-
- /* Is the target a register or local variable? */
-
- switch (op1->gtOper)
- {
- case GT_LCL_VAR:
-
- /* Does the target variable live in a register? */
-
- if (!genMarkLclVar(op1))
- break;
-
- __fallthrough;
-
- case GT_REG_VAR:
-
- /* Get hold of the target register */
-
- reg = op1->gtRegVar.gtRegNum;
-
- /* Make sure the target of the store is available */
-
- if (regSet.rsMaskUsed & genRegMask(reg))
- {
- regSet.rsSpillReg(reg);
- }
-
- /* Make the RHS addressable */
-
- addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
-
- /* Compute the new value into the target register */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_HAS_BYTE_REGS
-
- // Fix 383833 X86 ILGEN
- regNumber reg2;
- if ((op2->gtFlags & GTF_REG_VAL) != 0)
- {
- reg2 = op2->gtRegNum;
- }
- else
- {
- reg2 = REG_STK;
- }
-
- // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
- // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
- //
- if (varTypeIsByte(treeType) &&
- (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
- {
- // We will force op2 into a register (via sign/zero extending load)
- // for the cases where op2 is in memory and thus could have
- // an unmapped page just beyond its location
- //
- if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
- {
- genCodeForTree(op2, 0);
- assert((op2->gtFlags & GTF_REG_VAL) != 0);
- }
-
- inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
-
- bool canOmit = false;
-
- if (varTypeIsUnsigned(treeType))
- {
- // When op2 is a byte sized constant we can omit the zero extend instruction
- if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
- {
- canOmit = true;
- }
- }
- else // treeType is signed
- {
- // When op2 is a positive 7-bit or smaller constant
- // we can omit the sign extension sequence.
- if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
- {
- canOmit = true;
- }
- }
-
- if (!canOmit)
- {
- // If reg is a byte reg then we can use a movzx/movsx instruction
- //
- if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
- {
- instruction extendIns = ins_Move_Extend(treeType, true);
- inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
- }
- else // we can't encode a movzx/movsx instruction
- {
- if (varTypeIsUnsigned(treeType))
- {
- // otherwise, we must zero the upper 24 bits of 'reg'
- inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
- }
- else // treeType is signed
- {
- // otherwise, we must sign extend the result in the non-byteable register 'reg'
- // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
- // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
- //
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
- }
- }
- }
- }
- else
-#endif // CPU_HAS_BYTE_REGS
- {
- inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
- }
-
- /* The zero flag is now equal to the register value */
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- /* Remember that we trashed the target */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* Free up anything that was tied up by the RHS */
-
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
- return;
-
- default:
- break;
- } // end switch (op1->gtOper)
-
-#if !CPU_LOAD_STORE_ARCH
- /* Special case: "x ^= -1" is actually "not(x)" */
-
- if (oper == GT_ASG_XOR)
- {
- if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
- {
- addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
- inst_TT(INS_NOT, op1);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
- return;
- }
- }
-#endif // !CPU_LOAD_STORE_ARCH
-
- /* Setup target mask for op2 (byte-regs for small operands) */
-
- unsigned needMask;
- needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
-
- /* Is the second operand a constant? */
-
- if (op2->IsIntCnsFitsInI32())
- {
- int ival = (int)op2->gtIntCon.gtIconVal;
-
- /* Make the target addressable */
- addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
-
- inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
- return;
- }
-
- /* Is the value or the address to be computed first? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Compute the new value into a register */
-
- genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
-
- /* Make the target addressable for load/store */
- addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
- regSet.rsLockUsedReg(addrReg);
-
-#if !CPU_LOAD_STORE_ARCH
- // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
- /* For small types with overflow check, we need to
- sign/zero extend the result, so we need it in a reg */
-
- if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // !CPU_LOAD_STORE_ARCH
- {
- reg = regSet.rsPickReg();
- regSet.rsLockReg(genRegMask(reg));
-
- noway_assert(genIsValidReg(reg));
-
- /* Generate "ldr reg, [var]" */
-
- inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
-
- if (op1->gtOper == GT_LCL_VAR)
- regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
- else
- regTracker.rsTrackRegTrash(reg);
-
- /* Make sure the new value is in a register */
-
- genRecoverReg(op2, 0, RegSet::KEEP_REG);
-
- /* Compute the new value */
-
- inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
-
- if (ovfl)
- genCheckOverflow(tree);
-
- /* Move the new value back to the variable */
- /* Generate "str reg, [var]" */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
- regSet.rsUnlockReg(genRegMask(reg));
-
- if (op1->gtOper == GT_LCL_VAR)
- regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
- }
-#if !CPU_LOAD_STORE_ARCH
- else
- {
- /* Make sure the new value is in a register */
-
- genRecoverReg(op2, 0, RegSet::KEEP_REG);
-
- /* Add the new value into the target */
-
- inst_TT_RV(ins, op1, op2->gtRegNum);
- }
-#endif // !CPU_LOAD_STORE_ARCH
- /* Free up anything that was tied up either side */
- regSet.rsUnlockUsedReg(addrReg);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- genReleaseReg(op2);
- }
- else
- {
- /* Make the target addressable */
-
- addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
-
- /* Compute the new value into a register */
-
- genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
- regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
-
- /* Make sure the target is still addressable */
-
- addrReg = genKeepAddressable(op1, addrReg);
- regSet.rsLockUsedReg(addrReg);
-
-#if !CPU_LOAD_STORE_ARCH
- // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
-
- /* For small types with overflow check, we need to
- sign/zero extend the result, so we need it in a reg */
-
- if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // !CPU_LOAD_STORE_ARCH
- {
- reg = regSet.rsPickReg();
-
- inst_RV_TT(INS_mov, reg, op1);
-
- inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
- regTracker.rsTrackRegTrash(reg);
-
- if (ovfl)
- genCheckOverflow(tree);
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
-
- if (op1->gtOper == GT_LCL_VAR)
- regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
- }
-#if !CPU_LOAD_STORE_ARCH
- else
- {
- /* Add the new value into the target */
-
- inst_TT_RV(ins, op1, op2->gtRegNum);
- }
-#endif
-
- /* Free up anything that was tied up either side */
- regSet.rsUnlockUsedReg(addrReg);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
- genReleaseReg(op2);
- }
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
-}
-
-/*****************************************************************************
- *
- * Generate code for GT_UMOD.
- */
-
-void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_UMOD);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- /* Is this a division by an integer constant? */
-
- noway_assert(op2);
- if (compiler->fgIsUnsignedModOptimizable(op2))
- {
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- /* Generate the appropriate sequence */
- size_t ival = op2->gtIntCon.gtIconVal - 1;
- inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- genCodeForGeneralDivide(tree, destReg, bestReg);
-}
-
-/*****************************************************************************
- *
- * Generate code for GT_MOD.
- */
-
-void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_MOD);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- /* Is this a division by an integer constant? */
-
- noway_assert(op2);
- if (compiler->fgIsSignedModOptimizable(op2))
- {
- ssize_t ival = op2->gtIntCon.gtIconVal;
- BasicBlock* skip = genCreateTempLabel();
-
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- /* Generate the appropriate sequence */
-
- inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* Check and branch for a postive value */
- emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
- inst_JMP(jmpGEL, skip);
-
- /* Generate the rest of the sequence and we're done */
-
- genIncRegBy(reg, -1, NULL, treeType);
- ival = -ival;
- if ((treeType == TYP_LONG) && ((int)ival != ival))
- {
- regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
- instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
- inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
- }
- else
- {
- inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
- }
- genIncRegBy(reg, 1, NULL, treeType);
-
- /* Define the 'skip' label and we're done */
-
- genDefineTempLabel(skip);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- genCodeForGeneralDivide(tree, destReg, bestReg);
-}
-
-/*****************************************************************************
- *
- * Generate code for GT_UDIV.
- */
-
-void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_UDIV);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- /* Is this a division by an integer constant? */
-
- noway_assert(op2);
- if (compiler->fgIsUnsignedDivOptimizable(op2))
- {
- size_t ival = op2->gtIntCon.gtIconVal;
-
- /* Division by 1 must be handled elsewhere */
-
- noway_assert(ival != 1 || compiler->opts.MinOpts());
-
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- /* Generate "shr reg, log2(value)" */
-
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- genCodeForGeneralDivide(tree, destReg, bestReg);
-}
-
-/*****************************************************************************
- *
- * Generate code for GT_DIV.
- */
-
-void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_DIV);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- /* Is this a division by an integer constant? */
-
- noway_assert(op2);
- if (compiler->fgIsSignedDivOptimizable(op2))
- {
- ssize_t ival_s = op2->gtIntConCommon.IconValue();
- assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
- size_t ival = static_cast<size_t>(ival_s);
-
- /* Division by 1 must be handled elsewhere */
-
- noway_assert(ival != 1);
-
- BasicBlock* onNegDivisee = genCreateTempLabel();
-
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- if (ival == 2)
- {
- /* Generate "sar reg, log2(value)" */
-
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
-
- // Check and branch for a postive value, skipping the INS_ADDC instruction
- emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
- inst_JMP(jmpGEL, onNegDivisee);
-
- // Add the carry flag to 'reg'
- inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
-
- /* Define the 'onNegDivisee' label and we're done */
-
- genDefineTempLabel(onNegDivisee);
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* The result is the same as the operand */
-
- reg = op1->gtRegNum;
- }
- else
- {
- /* Generate the following sequence */
- /*
- test reg, reg
- jns onNegDivisee
- add reg, ival-1
- onNegDivisee:
- sar reg, log2(ival)
- */
-
- instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
-
- // Check and branch for a postive value, skipping the INS_add instruction
- emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
- inst_JMP(jmpGEL, onNegDivisee);
-
- inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
-
- /* Define the 'onNegDivisee' label and we're done */
-
- genDefineTempLabel(onNegDivisee);
-
- /* Generate "sar reg, log2(value)" */
-
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* The result is the same as the operand */
-
- reg = op1->gtRegNum;
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- genCodeForGeneralDivide(tree, destReg, bestReg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
- * (if op2 is not a power of 2 constant).
- */
-
-void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
- tree->OperGet() == GT_DIV);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
- instruction ins;
- bool gotOp1;
- regMaskTP addrReg;
-
-#if USE_HELPERS_FOR_INT_DIV
- noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
-#endif
-
-#if defined(_TARGET_XARCH_)
-
- /* Which operand are we supposed to evaluate first? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* We'll evaluate 'op2' first */
-
- gotOp1 = false;
- destReg &= ~op1->gtRsvdRegs;
-
- /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
- if (op1->gtOper == GT_LCL_VAR)
- {
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- noway_assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- if (varDsc->lvRegister)
- {
- destReg &= ~genRegMask(varDsc->lvRegNum);
- }
- }
- }
- else
- {
- /* We'll evaluate 'op1' first */
-
- gotOp1 = true;
-
- regMaskTP op1Mask;
- if (RBM_EAX & op2->gtRsvdRegs)
- op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
- else
- op1Mask = RBM_EAX; // EAX would be ideal
-
- /* Generate the dividend into EAX and hold on to it. freeOnly=true */
-
- genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
- }
-
- /* We want to avoid using EAX or EDX for the second operand */
-
- destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
-
- /* Make the second operand addressable */
- op2 = genCodeForCommaTree(op2);
-
- /* Special case: if op2 is a local var we are done */
-
- if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
- {
- if ((op2->gtFlags & GTF_REG_VAL) == 0)
- addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
- else
- addrReg = 0;
- }
- else
- {
- genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- addrReg = genRegMask(op2->gtRegNum);
- }
-
- /* Make sure we have the dividend in EAX */
-
- if (gotOp1)
- {
- /* We've previously computed op1 into EAX */
-
- genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
- }
- else
- {
- /* Compute op1 into EAX and hold on to it */
-
- genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- }
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegNum == REG_EAX);
-
- /* We can now safely (we think) grab EDX */
-
- regSet.rsGrabReg(RBM_EDX);
- regSet.rsLockReg(RBM_EDX);
-
- /* Convert the integer in EAX into a un/signed long in EDX:EAX */
-
- const genTreeOps oper = tree->OperGet();
-
- if (oper == GT_UMOD || oper == GT_UDIV)
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
- else
- instGen(INS_cdq);
-
- /* Make sure the divisor is still addressable */
-
- addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
-
- /* Perform the division */
-
- if (oper == GT_UMOD || oper == GT_UDIV)
- inst_TT(INS_UNSIGNED_DIVIDE, op2);
- else
- inst_TT(INS_SIGNED_DIVIDE, op2);
-
- /* Free up anything tied up by the divisor's address */
-
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- /* Unlock and free EDX */
-
- regSet.rsUnlockReg(RBM_EDX);
-
- /* Free up op1 (which is in EAX) as well */
-
- genReleaseReg(op1);
-
- /* Both EAX and EDX are now trashed */
-
- regTracker.rsTrackRegTrash(REG_EAX);
- regTracker.rsTrackRegTrash(REG_EDX);
-
- /* Figure out which register the result is in */
-
- reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
-
- /* Don't forget to mark the first operand as using EAX and EDX */
-
- op1->gtRegNum = reg;
-
- genCodeForTree_DONE(tree, reg);
-
-#elif defined(_TARGET_ARM_)
-
- /* Which operand are we supposed to evaluate first? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* We'll evaluate 'op2' first */
-
- gotOp1 = false;
- destReg &= ~op1->gtRsvdRegs;
-
- /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
- if (op1->gtOper == GT_LCL_VAR)
- {
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- noway_assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- if (varDsc->lvRegister)
- {
- destReg &= ~genRegMask(varDsc->lvRegNum);
- }
- }
- }
- else
- {
- /* We'll evaluate 'op1' first */
-
- gotOp1 = true;
- regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
-
- /* Generate the dividend into a register and hold on to it. */
-
- genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
- }
-
- /* Evaluate the second operand into a register and hold onto it. */
-
- genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- addrReg = genRegMask(op2->gtRegNum);
-
- if (gotOp1)
- {
- // Recover op1 if spilled
- genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
- }
- else
- {
- /* Compute op1 into any register and hold on to it */
- genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- // Perform the divison
-
- const genTreeOps oper = tree->OperGet();
-
- if (oper == GT_UMOD || oper == GT_UDIV)
- ins = INS_udiv;
- else
- ins = INS_sdiv;
-
- getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
-
- if (oper == GT_UMOD || oper == GT_MOD)
- {
- getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
- getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
- }
- /* Free up op1 and op2 */
- genReleaseReg(op1);
- genReleaseReg(op2);
-
- genCodeForTree_DONE(tree, reg);
-
-#else
-#error "Unknown _TARGET_"
-#endif
-}
-
-/*****************************************************************************
- *
- * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
- */
-
-void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
-
- const genTreeOps oper = tree->OperGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- regMaskTP needReg = destReg;
- regNumber reg;
- instruction ins;
- regMaskTP addrReg;
-
- switch (oper)
- {
- case GT_ASG_LSH:
- ins = INS_SHIFT_LEFT_LOGICAL;
- break;
- case GT_ASG_RSH:
- ins = INS_SHIFT_RIGHT_ARITHM;
- break;
- case GT_ASG_RSZ:
- ins = INS_SHIFT_RIGHT_LOGICAL;
- break;
- default:
- unreached();
- }
-
- noway_assert(!varTypeIsGC(treeType));
- noway_assert(op2);
-
- /* Shifts by a constant amount are easier */
-
- if (op2->IsCnsIntOrI())
- {
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
-
- /* Are we shifting a register left by 1 bit? */
-
- if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && (op1->gtFlags & GTF_REG_VAL))
- {
- /* The target lives in a register */
-
- reg = op1->gtRegNum;
-
- /* "add reg, reg" is cheaper than "shl reg, 1" */
-
- inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
- }
- else
- {
-#if CPU_LOAD_STORE_ARCH
- if ((op1->gtFlags & GTF_REG_VAL) == 0)
- {
- regSet.rsLockUsedReg(addrReg);
-
- // Load op1 into a reg
-
- reg = regSet.rsPickReg(RBM_ALLINT);
-
- inst_RV_TT(INS_mov, reg, op1);
-
- // Issue the shift
-
- inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
- regTracker.rsTrackRegTrash(reg);
-
- /* Store the (sign/zero extended) result back to the stack location of the variable */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
-
- regSet.rsUnlockUsedReg(addrReg);
- }
- else
-#endif // CPU_LOAD_STORE_ARCH
- {
- /* Shift by the constant value */
-
- inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
- }
- }
-
- /* If the target is a register, it has a new value */
-
- if (op1->gtFlags & GTF_REG_VAL)
- regTracker.rsTrackRegTrash(op1->gtRegNum);
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- /* The zero flag is now equal to the target value */
- /* X86: But only if the shift count is != 0 */
-
- if (op2->gtIntCon.gtIconVal != 0)
- {
- if (tree->gtSetFlags())
- {
- if (op1->gtOper == GT_LCL_VAR)
- {
- genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
- }
- else if (op1->gtOper == GT_REG_VAR)
- {
- genFlagsEqualToReg(tree, op1->gtRegNum);
- }
- }
- }
- else
- {
- // It is possible for the shift count to equal 0 with valid
- // IL, and not be optimized away, in the case where the node
- // is of a small type. The sequence of instructions looks like
- // ldsfld, shr, stsfld and executed on a char field. This will
- // never happen with code produced by our compilers, because the
- // compilers will insert a conv.u2 before the stsfld (which will
- // lead us down a different codepath in the JIT and optimize away
- // the shift by zero). This case is not worth optimizing and we
- // will just make sure to generate correct code for it.
-
- genFlagsEqualToNone();
- }
- }
- else
- {
- regMaskTP op2Regs = RBM_NONE;
- if (REG_SHIFT != REG_NA)
- op2Regs = RBM_SHIFT;
-
- regMaskTP tempRegs;
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
- genCodeForTree(op2, tempRegs);
- regSet.rsMarkRegUsed(op2);
-
- tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
- addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
-
- genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
- }
- else
- {
- /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
- regMaskTP excludeMask = op2->gtRsvdRegs;
- if (REG_SHIFT != REG_NA)
- excludeMask |= RBM_SHIFT;
-
- tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
- addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
-
- /* Load the shift count into the necessary register */
- genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
- }
-
- /* Make sure the address registers are still here */
- addrReg = genKeepAddressable(op1, addrReg, op2Regs);
-
-#ifdef _TARGET_XARCH_
- /* Perform the shift */
- inst_TT_CL(ins, op1);
-#else
- /* Perform the shift */
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- op2Regs = genRegMask(op2->gtRegNum);
-
- regSet.rsLockUsedReg(addrReg | op2Regs);
- inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
- regSet.rsUnlockUsedReg(addrReg | op2Regs);
-#endif
- /* Free the address registers */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- /* If the value is in a register, it's now trash */
-
- if (op1->gtFlags & GTF_REG_VAL)
- regTracker.rsTrackRegTrash(op1->gtRegNum);
-
- /* Release the op2 [RBM_SHIFT] operand */
-
- genReleaseReg(op2);
- }
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
-}
-
-/*****************************************************************************
- *
- * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
- */
-
-void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperIsShift());
-
- const genTreeOps oper = tree->OperGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- regMaskTP needReg = destReg;
- regNumber reg;
- instruction ins;
-
- switch (oper)
- {
- case GT_LSH:
- ins = INS_SHIFT_LEFT_LOGICAL;
- break;
- case GT_RSH:
- ins = INS_SHIFT_RIGHT_ARITHM;
- break;
- case GT_RSZ:
- ins = INS_SHIFT_RIGHT_LOGICAL;
- break;
- default:
- unreached();
- }
-
- /* Is the shift count constant? */
- noway_assert(op2);
- if (op2->IsIntCnsFitsInI32())
- {
- // TODO: Check to see if we could generate a LEA instead!
-
- /* Compute the left operand into any free register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- reg = op1->gtRegNum;
-
- /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
-
- // On ARM, until proven otherwise by performance numbers, just do the shift.
- // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
- // It's smaller than two "add reg, reg".
-
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifndef _TARGET_ARM_
- if (oper == GT_LSH)
- {
- emitAttr size = emitActualTypeSize(treeType);
- if (op2->gtIntConCommon.IconValue() == 1)
- {
- /* "add reg, reg" is smaller and faster than "shl reg, 1" */
- inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
- }
- else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
- {
- /* two "add reg, reg" instructions are faster than "shl reg, 2" */
- inst_RV_RV(INS_add, reg, reg, treeType);
- inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
- }
- else
- goto DO_SHIFT_BY_CNS;
- }
- else
-#endif // _TARGET_ARM_
- {
-#ifndef _TARGET_ARM_
- DO_SHIFT_BY_CNS:
-#endif // _TARGET_ARM_
- // If we are shifting 'reg' by zero bits and do not need the flags to be set
- // then we can just skip emitting the instruction as 'reg' is already correct.
- //
- if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
- {
- /* Generate the appropriate shift instruction */
- inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
- }
- }
- }
- else
- {
- /* Calculate a useful register mask for computing op1 */
- needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
- regMaskTP op2RegMask;
-#ifdef _TARGET_XARCH_
- op2RegMask = RBM_ECX;
-#else
- op2RegMask = RBM_NONE;
-#endif
- needReg = regSet.rsMustExclude(needReg, op2RegMask);
-
- regMaskTP tempRegs;
-
- /* Which operand are we supposed to evaluate first? */
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Load the shift count [into ECX on XARCH] */
- tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
- genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
-
- /* We must not target the register that is holding op2 */
- needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
-
- /* Now evaluate 'op1' into a free register */
- genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
-
- /* Recover op2 into ECX */
- genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
- }
- else
- {
- /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
- tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
- genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
-
- /* Load the shift count [into ECX on XARCH] */
- genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
- }
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-#ifdef _TARGET_XARCH_
- noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
-#endif
- // Check for the case of op1 being spilled during the evaluation of op2
- if (op1->gtFlags & GTF_SPILLED)
- {
- // The register has been spilled -- reload it to any register except ECX
- regSet.rsLockUsedReg(op2RegMask);
- regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
- regSet.rsUnlockUsedReg(op2RegMask);
- }
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- reg = op1->gtRegNum;
-
-#ifdef _TARGET_ARM_
- /* Perform the shift */
- getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
-#else
- /* Perform the shift */
- inst_RV_CL(ins, reg);
-#endif
- genReleaseReg(op2);
- }
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(reg == op1->gtRegNum);
-
- /* The register is now trashed */
- genReleaseReg(op1);
- regTracker.rsTrackRegTrash(reg);
-
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
- * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
- */
-
-void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
- tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
-
- const genTreeOps oper = tree->OperGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- // Longs and float comparisons are converted to "?:"
- noway_assert(!compiler->fgMorphRelopToQmark(op1));
-
- // Check if we can use the currently set flags. Else set them
-
- emitJumpKind jumpKind = genCondSetFlags(tree);
-
- // Grab a register to materialize the bool value into
-
- bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
-
- // Check that the predictor did the right job
- noway_assert(bestReg);
-
- // If needReg is in bestReg then use it
- if (needReg & bestReg)
- reg = regSet.rsGrabReg(needReg & bestReg);
- else
- reg = regSet.rsGrabReg(bestReg);
-
-#if defined(_TARGET_ARM_)
-
- // Generate:
- // jump-if-true L_true
- // mov reg, 0
- // jmp L_end
- // L_true:
- // mov reg, 1
- // L_end:
-
- BasicBlock* L_true;
- BasicBlock* L_end;
-
- L_true = genCreateTempLabel();
- L_end = genCreateTempLabel();
-
- inst_JMP(jumpKind, L_true);
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
- inst_JMP(EJ_jmp, L_end);
- genDefineTempLabel(L_true);
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
- genDefineTempLabel(L_end);
-
- regTracker.rsTrackRegTrash(reg);
-
-#elif defined(_TARGET_XARCH_)
- regMaskTP regs = genRegMask(reg);
- noway_assert(regs & RBM_BYTE_REGS);
-
- // Set (lower byte of) reg according to the flags
-
- /* Look for the special case where just want to transfer the carry bit */
-
- if (jumpKind == EJ_jb)
- {
- inst_RV_RV(INS_SUBC, reg, reg);
- inst_RV(INS_NEG, reg, TYP_INT);
- regTracker.rsTrackRegTrash(reg);
- }
- else if (jumpKind == EJ_jae)
- {
- inst_RV_RV(INS_SUBC, reg, reg);
- genIncRegBy(reg, 1, tree, TYP_INT);
- regTracker.rsTrackRegTrash(reg);
- }
- else
- {
- inst_SET(jumpKind, reg);
-
- regTracker.rsTrackRegTrash(reg);
-
- if (treeType == TYP_INT)
- {
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- }
- else
- {
- noway_assert(treeType == TYP_BYTE);
- }
- }
-#else
- NYI("TARGET");
-#endif // _TARGET_XXX
-
- genCodeForTree_DONE(tree, reg);
-}
-
-//------------------------------------------------------------------------
-// genCodeForCopyObj: Generate code for a CopyObj node
-//
-// Arguments:
-// tree - The CopyObj node we are going to generate code for.
-// destReg - The register mask for register(s), if any, that will be defined.
-//
-// Return Value:
-// None
-
-void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg)
-{
- // If the value class doesn't have any fields that are GC refs or
- // the target isn't on the GC-heap, we can merge it with CPBLK.
- // GC fields cannot be copied directly, instead we will
- // need to use a jit-helper for that.
- assert(tree->gtOper == GT_ASG);
- assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
-
- GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
- assert(cpObjOp->HasGCPtr());
-
-#ifdef _TARGET_ARM_
- if (cpObjOp->IsVolatile())
- {
- // Emit a memory barrier instruction before the CopyBlk
- instGen_MemoryBarrier();
- }
-#endif
- assert(tree->gtOp.gtOp2->OperIsIndir());
- GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
- GenTreePtr dstObj = cpObjOp->Addr();
-
- noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
-
-#ifdef DEBUG
- CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
- size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
-
- // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
- // The EE currently does not allow this. Let's assert it just to be safe.
- noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
-#endif
-
- size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
- unsigned slots = cpObjOp->gtSlots;
- BYTE* gcPtrs = cpObjOp->gtGcPtrs;
- unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
- assert(blkSize == cpObjOp->gtBlkSize);
-
- GenTreePtr treeFirst, treeSecond;
- regNumber regFirst, regSecond;
-
- // Check what order the object-ptrs have to be evaluated in ?
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- treeFirst = srcObj;
- treeSecond = dstObj;
-#if CPU_USES_BLOCK_MOVE
- regFirst = REG_ESI;
- regSecond = REG_EDI;
-#else
- regFirst = REG_ARG_1;
- regSecond = REG_ARG_0;
-#endif
- }
- else
- {
- treeFirst = dstObj;
- treeSecond = srcObj;
-#if CPU_USES_BLOCK_MOVE
- regFirst = REG_EDI;
- regSecond = REG_ESI;
-#else
- regFirst = REG_ARG_0;
- regSecond = REG_ARG_1;
-#endif
- }
-
- bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
- bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
- emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
-
-#if CPU_USES_BLOCK_MOVE
- // Materialize the trees in the order desired
-
- genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
-
- // Grab ECX because it will be trashed by the helper
- //
- regSet.rsGrabReg(RBM_ECX);
-
- while (blkSize >= TARGET_POINTER_SIZE)
- {
- if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
- {
- // Note that we can use movsd even if it is a GC pointer being transfered
- // because the value is not cached anywhere. If we did this in two moves,
- // we would have to make certain we passed the appropriate GC info on to
- // the emitter.
- instGen(INS_movsp);
- }
- else
- {
- // This helper will act like a MOVSD
- // -- inputs EDI and ESI are byrefs
- // -- including incrementing of ESI and EDI by 4
- // -- helper will trash ECX
- //
- regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
- regSet.rsLockUsedReg(argRegs);
- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
- 0, // argSize
- EA_PTRSIZE); // retSize
- regSet.rsUnlockUsedReg(argRegs);
- }
-
- blkSize -= TARGET_POINTER_SIZE;
- }
-
- // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
-
- regTracker.rsTrackRegTrash(REG_EDI);
- regTracker.rsTrackRegTrash(REG_ESI);
- regTracker.rsTrackRegTrash(REG_ECX);
-
- gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
-
- /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
- it is a emitNoGChelper. However, we have to let the emitter know that
- the GC liveness has changed. We do this by creating a new label.
- */
-
- noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
-
- genDefineTempLabel(&dummyBB);
-
-#else // !CPU_USES_BLOCK_MOVE
-
-#ifndef _TARGET_ARM_
-// Currently only the ARM implementation is provided
-#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
-#endif
-
- // Materialize the trees in the order desired
- bool helperUsed;
- regNumber regDst;
- regNumber regSrc;
- regNumber regTemp;
-
- if ((gcPtrCount > 0) && !dstIsOnStack)
- {
- genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
-
- /* The helper is a Asm-routine that will trash R2,R3 and LR */
- {
- /* Spill any callee-saved registers which are being used */
- regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
-
- if (spillRegs)
- {
- regSet.rsSpillRegs(spillRegs);
- }
- }
-
- // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
- // We will also use it as the temp register for our load/store sequences
- //
- assert(REG_R2 == REG_TMP_1);
- regTemp = regSet.rsGrabReg(RBM_R2);
- helperUsed = true;
- }
- else
- {
- genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
- genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
- genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
-
- // Grab any temp register to use for our load/store sequences
- //
- regTemp = regSet.rsGrabReg(RBM_ALLINT);
- helperUsed = false;
- }
- assert(dstObj->gtFlags & GTF_REG_VAL);
- assert(srcObj->gtFlags & GTF_REG_VAL);
-
- regDst = dstObj->gtRegNum;
- regSrc = srcObj->gtRegNum;
-
- assert(regDst != regTemp);
- assert(regSrc != regTemp);
-
- instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
- instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
-
- size_t offset = 0;
- while (blkSize >= TARGET_POINTER_SIZE)
- {
- CorInfoGCType gcType;
- CorInfoGCType gcTypeNext = TYPE_GC_NONE;
- var_types type = TYP_I_IMPL;
-
-#if FEATURE_WRITE_BARRIER
- gcType = (CorInfoGCType)(*gcPtrs++);
- if (blkSize > TARGET_POINTER_SIZE)
- gcTypeNext = (CorInfoGCType)(*gcPtrs);
-
- if (gcType == TYPE_GC_REF)
- type = TYP_REF;
- else if (gcType == TYPE_GC_BYREF)
- type = TYP_BYREF;
-
- if (helperUsed)
- {
- assert(regDst == REG_ARG_0);
- assert(regSrc == REG_ARG_1);
- assert(regTemp == REG_R2);
- }
-#else
- gcType = TYPE_GC_NONE;
-#endif // FEATURE_WRITE_BARRIER
-
- blkSize -= TARGET_POINTER_SIZE;
-
- emitAttr opSize = emitTypeSize(type);
-
- if (!helperUsed || (gcType == TYPE_GC_NONE))
- {
- getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
- getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
- offset += TARGET_POINTER_SIZE;
-
- if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
- {
- getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
- getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
- offset = 0;
- }
- }
- else
- {
- assert(offset == 0);
-
- // The helper will act like this:
- // -- inputs R0 and R1 are byrefs
- // -- helper will perform copy from *R1 into *R0
- // -- helper will perform post increment of R0 and R1 by 4
- // -- helper will trash R2
- // -- helper will trash R3
- // -- calling the helper implicitly trashes LR
- //
- assert(helperUsed);
- regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
- regSet.rsLockUsedReg(argRegs);
- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
- 0, // argSize
- EA_PTRSIZE); // retSize
-
- regSet.rsUnlockUsedReg(argRegs);
- regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
- }
- }
-
- regTracker.rsTrackRegTrash(regDst);
- regTracker.rsTrackRegTrash(regSrc);
- regTracker.rsTrackRegTrash(regTemp);
-
- gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
-
- /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
- it is a emitNoGChelper. However, we have to let the emitter know that
- the GC liveness has changed. We do this by creating a new label.
- */
-
- noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
-
- genDefineTempLabel(&dummyBB);
-
-#endif // !CPU_USES_BLOCK_MOVE
-
- assert(blkSize == 0);
-
- genReleaseReg(dstObj);
- genReleaseReg(srcObj);
-
- genCodeForTree_DONE(tree, REG_NA);
-
-#ifdef _TARGET_ARM_
- if (cpObjOp->IsVolatile())
- {
- // Emit a memory barrier instruction after the CopyBlk
- instGen_MemoryBarrier();
- }
-#endif
-}
-
-//------------------------------------------------------------------------ // genCodeForBlkOp: Generate code for a block copy or init operation // // Arguments: // tree - The block assignment // destReg - The expected destination register // void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg)
-{
- genTreeOps oper = tree->OperGet();
- GenTreePtr dest = tree->gtOp.gtOp1;
- GenTreePtr src = tree->gtGetOp2();
- regMaskTP needReg = destReg;
- regMaskTP regs = regSet.rsMaskUsed;
- GenTreePtr opsPtr[3];
- regMaskTP regsPtr[3];
- GenTreePtr destPtr;
- GenTreePtr srcPtrOrVal;
-
- noway_assert(tree->OperIsBlkOp());
-
- bool isCopyBlk = false;
- bool isInitBlk = false;
- bool hasGCpointer = false;
- unsigned blockSize = dest->AsBlk()->gtBlkSize;
- GenTreePtr sizeNode = nullptr;
- bool sizeIsConst = true;
- if (dest->gtOper == GT_DYN_BLK)
- {
- sizeNode = dest->AsDynBlk()->gtDynamicSize;
- sizeIsConst = false;
- }
-
- if (tree->OperIsCopyBlkOp())
- {
- isCopyBlk = true;
- if (dest->gtOper == GT_OBJ)
- {
- if (dest->AsObj()->gtGcPtrCount != 0)
- {
- genCodeForCopyObj(tree, destReg);
- return;
- }
- }
- }
- else
- {
- isInitBlk = true;
- }
-
- // Ensure that we have an address in the CopyBlk case.
- if (isCopyBlk)
- {
- // TODO-1stClassStructs: Allow a lclVar here.
- assert(src->OperIsIndir());
- srcPtrOrVal = src->AsIndir()->Addr();
- }
- else
- {
- srcPtrOrVal = src;
- }
-
-#ifdef _TARGET_ARM_
- if (dest->AsBlk()->IsVolatile())
- {
- // Emit a memory barrier instruction before the InitBlk/CopyBlk
- instGen_MemoryBarrier();
- }
-#endif
- {
- destPtr = dest->AsBlk()->Addr();
- noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
- noway_assert(
- (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
- (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
-
- noway_assert(destPtr && srcPtrOrVal);
-
-#if CPU_USES_BLOCK_MOVE
- regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
-
- /* Some special code for block moves/inits for constant sizes */
-
- //
- // Is this a fixed size COPYBLK?
- // or a fixed size INITBLK with a constant init value?
- //
- if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
- {
- size_t length = blockSize;
- size_t initVal = 0;
- instruction ins_P, ins_PR, ins_B;
-
- if (isInitBlk)
- {
- ins_P = INS_stosp;
- ins_PR = INS_r_stosp;
- ins_B = INS_stosb;
-
- /* Properly extend the init constant from a U1 to a U4 */
- initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
-
- /* If it is a non-zero value we have to replicate */
- /* the byte value four times to form the DWORD */
- /* Then we change this new value into the tree-node */
-
- if (initVal)
- {
- initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
-#ifdef _TARGET_64BIT_
- if (length > 4)
- {
- initVal = initVal | (initVal << 32);
- srcPtrOrVal->gtType = TYP_LONG;
- }
- else
- {
- srcPtrOrVal->gtType = TYP_INT;
- }
-#endif // _TARGET_64BIT_
- }
- srcPtrOrVal->gtIntCon.gtIconVal = initVal;
- }
- else
- {
- ins_P = INS_movsp;
- ins_PR = INS_r_movsp;
- ins_B = INS_movsb;
- }
-
- // Determine if we will be using SSE2
- unsigned movqLenMin = 8;
- unsigned movqLenMax = 24;
-
- bool bWillUseSSE2 = false;
- bool bWillUseOnlySSE2 = false;
- bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
-
-#ifdef _TARGET_64BIT_
-
-// Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
-// there is no point in wasting space on the bigger instructions
-
-#else // !_TARGET_64BIT_
-
- if (compiler->opts.compCanUseSSE2)
- {
- unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
-
- /* Adjust for BB weight */
- if (curBBweight == BB_ZERO_WEIGHT)
- {
- // Don't bother with this optimization in
- // rarely run blocks
- movqLenMax = movqLenMin = 0;
- }
- else if (curBBweight < BB_UNITY_WEIGHT)
- {
- // Be less aggressive when we are inside a conditional
- movqLenMax = 16;
- }
- else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
- {
- // Be more aggressive when we are inside a loop
- movqLenMax = 48;
- }
-
- if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
- {
- // Be more aggressive when optimizing for speed
- // InitBlk uses fewer instructions
- movqLenMax += 16;
- }
-
- if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
- {
- bWillUseSSE2 = true;
-
- if ((length % 8) == 0)
- {
- bWillUseOnlySSE2 = true;
- if (isInitBlk && (initVal == 0))
- {
- bNeedEvaluateCnst = false;
- noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
- }
- }
- }
- }
-
-#endif // !_TARGET_64BIT_
-
- const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
- /* Evaluate dest and src/val */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- if (bNeedEvaluateCnst)
- {
- genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
- }
- genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
- if (bNeedEvaluateCnst)
- {
- genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
- }
- }
- else
- {
- genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
- if (bNeedEvaluateCnst)
- {
- genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
- }
- genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
- }
-
- bool bTrashedESI = false;
- bool bTrashedEDI = false;
-
- if (bWillUseSSE2)
- {
- int blkDisp = 0;
- regNumber xmmReg = REG_XMM0;
-
- if (isInitBlk)
- {
- if (initVal)
- {
- getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
- getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
- }
- else
- {
- getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
- }
- }
-
- JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
- length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
-
- while (length > 7)
- {
- if (isInitBlk)
- {
- getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
- }
- else
- {
- getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
- getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
- }
- blkDisp += 8;
- length -= 8;
- }
-
- if (length > 0)
- {
- noway_assert(bNeedEvaluateCnst);
- noway_assert(!bWillUseOnlySSE2);
-
- if (isCopyBlk)
- {
- inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
- bTrashedESI = true;
- }
-
- inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
- bTrashedEDI = true;
-
- if (length >= REGSIZE_BYTES)
- {
- instGen(ins_P);
- length -= REGSIZE_BYTES;
- }
- }
- }
- else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
- {
- /* For small code, we can only use ins_DR to generate fast
- and small code. We also can't use "rep movsb" because
- we may not atomically reading and writing the DWORD */
-
- noway_assert(bNeedEvaluateCnst);
-
- goto USE_DR;
- }
- else if (length <= 4 * REGSIZE_BYTES)
- {
- noway_assert(bNeedEvaluateCnst);
-
- while (length >= REGSIZE_BYTES)
- {
- instGen(ins_P);
- length -= REGSIZE_BYTES;
- }
-
- bTrashedEDI = true;
- if (isCopyBlk)
- bTrashedESI = true;
- }
- else
- {
- USE_DR:
- noway_assert(bNeedEvaluateCnst);
-
- /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
- genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
-
- length &= (REGSIZE_BYTES - 1);
-
- instGen(ins_PR);
-
- regTracker.rsTrackRegTrash(REG_ECX);
-
- bTrashedEDI = true;
- if (isCopyBlk)
- bTrashedESI = true;
- }
-
- /* Now take care of the remainder */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_64BIT_
- if (length > 4)
- {
- noway_assert(bNeedEvaluateCnst);
- noway_assert(length < 8);
-
- instGen((isInitBlk) ? INS_stosd : INS_movsd);
- length -= 4;
-
- bTrashedEDI = true;
- if (isCopyBlk)
- bTrashedESI = true;
- }
-
-#endif // _TARGET_64BIT_
-
- if (length)
- {
- noway_assert(bNeedEvaluateCnst);
-
- while (length--)
- {
- instGen(ins_B);
- }
-
- bTrashedEDI = true;
- if (isCopyBlk)
- bTrashedESI = true;
- }
-
- noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
- if (bTrashedEDI)
- regTracker.rsTrackRegTrash(REG_EDI);
- if (bTrashedESI)
- regTracker.rsTrackRegTrash(REG_ESI);
- // else No need to trash EAX as it wasnt destroyed by the "rep stos"
-
- genReleaseReg(destPtr);
- if (bNeedEvaluateCnst)
- genReleaseReg(srcPtrOrVal);
- }
- else
- {
- //
- // This a variable-sized COPYBLK/INITBLK,
- // or a fixed size INITBLK with a variable init value,
- //
-
- // What order should the Dest, Val/Src, and Size be calculated
-
- compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
-
- noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
- genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
- genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
- if (opsPtr[2] != nullptr)
- {
- genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
- }
- genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
- genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
-
- noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
- (destPtr->gtRegNum == REG_EDI));
-
- noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
- (genRegMask(srcPtrOrVal->gtRegNum) == regs));
-
- if (sizeIsConst)
- {
- inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
- }
- else
- {
- noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
- (sizeNode->gtRegNum == REG_ECX));
- }
-
- if (isInitBlk)
- instGen(INS_r_stosb);
- else
- instGen(INS_r_movsb);
-
- regTracker.rsTrackRegTrash(REG_EDI);
- regTracker.rsTrackRegTrash(REG_ECX);
-
- if (isCopyBlk)
- regTracker.rsTrackRegTrash(REG_ESI);
- // else No need to trash EAX as it wasnt destroyed by the "rep stos"
-
- genReleaseReg(opsPtr[0]);
- genReleaseReg(opsPtr[1]);
- if (opsPtr[2] != nullptr)
- {
- genReleaseReg(opsPtr[2]);
- }
- }
-
-#else // !CPU_USES_BLOCK_MOVE
-
-#ifndef _TARGET_ARM_
-// Currently only the ARM implementation is provided
-#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
-#endif
- //
- // Is this a fixed size COPYBLK?
- // or a fixed size INITBLK with a constant init value?
- //
- if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
- {
- GenTreePtr dstOp = destPtr;
- GenTreePtr srcOp = srcPtrOrVal;
- unsigned length = blockSize;
- unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
- unsigned initVal = 0;
- bool useLoop = false;
-
- if (isInitBlk)
- {
- /* Properly extend the init constant from a U1 to a U4 */
- initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
-
- /* If it is a non-zero value we have to replicate */
- /* the byte value four times to form the DWORD */
- /* Then we store this new value into the tree-node */
-
- if (initVal != 0)
- {
- initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
- srcPtrOrVal->gtIntCon.gtIconVal = initVal;
- }
- }
-
- // Will we be using a loop to implement this INITBLK/COPYBLK?
- if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
- {
- useLoop = true;
- }
-
- regMaskTP usedRegs;
- regNumber regDst;
- regNumber regSrc;
- regNumber regTemp;
-
- /* Evaluate dest and src/val */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(srcOp->gtFlags & GTF_REG_VAL);
-
- genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(dstOp->gtFlags & GTF_REG_VAL);
- regDst = dstOp->gtRegNum;
-
- genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
- regSrc = srcOp->gtRegNum;
- }
- else
- {
- genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(dstOp->gtFlags & GTF_REG_VAL);
-
- genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(srcOp->gtFlags & GTF_REG_VAL);
- regSrc = srcOp->gtRegNum;
-
- genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
- regDst = dstOp->gtRegNum;
- }
- assert(dstOp->gtFlags & GTF_REG_VAL);
- assert(srcOp->gtFlags & GTF_REG_VAL);
-
- regDst = dstOp->gtRegNum;
- regSrc = srcOp->gtRegNum;
- usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
- bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
- emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- emitAttr srcType;
-
- if (isCopyBlk)
- {
- // Prefer a low register,but avoid one of the ones we've already grabbed
- regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- usedRegs |= genRegMask(regTemp);
- bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
- srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- }
- else
- {
- regTemp = REG_STK;
- srcType = EA_PTRSIZE;
- }
-
- instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
- instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
-
- int finalOffset;
-
- // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
- if (!useLoop)
- {
- for (unsigned i = 0; i < fullStoreCount; i++)
- {
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
- }
- }
-
- finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
- length -= finalOffset;
- }
- else // We will use a loop to implement this INITBLK/COPYBLK
- {
- unsigned pairStoreLoopCount = fullStoreCount / 2;
-
- // We need a second temp register for CopyBlk
- regNumber regTemp2 = REG_STK;
- if (isCopyBlk)
- {
- // Prefer a low register, but avoid one of the ones we've already grabbed
- regTemp2 =
- regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- usedRegs |= genRegMask(regTemp2);
- }
-
- // Pick and initialize the loop counter register
- regNumber regLoopIndex;
- regLoopIndex =
- regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
-
- // Create and define the Basic Block for the loop top
- BasicBlock* loopTopBlock = genCreateTempLabel();
- genDefineTempLabel(loopTopBlock);
-
- // The loop body
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
- regTracker.rsTrackRegTrash(regSrc);
- regTracker.rsTrackRegTrash(regTemp);
- regTracker.rsTrackRegTrash(regTemp2);
- }
- else // isInitBlk
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
- }
-
- getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(regDst);
- getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
- emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
- inst_JMP(jmpGTS, loopTopBlock);
-
- regTracker.rsTrackRegIntCns(regLoopIndex, 0);
-
- length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
-
- if (length & TARGET_POINTER_SIZE)
- {
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
- }
- finalOffset = TARGET_POINTER_SIZE;
- length -= TARGET_POINTER_SIZE;
- }
- else
- {
- finalOffset = 0;
- }
- }
-
- if (length & sizeof(short))
- {
- loadIns = ins_Load(TYP_USHORT); // INS_ldrh
- storeIns = ins_Store(TYP_USHORT); // INS_strh
-
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
- getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
- }
- length -= sizeof(short);
- finalOffset += sizeof(short);
- }
-
- if (length & sizeof(char))
- {
- loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
- storeIns = ins_Store(TYP_UBYTE); // INS_strb
-
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
- getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
- }
- length -= sizeof(char);
- }
- assert(length == 0);
-
- genReleaseReg(dstOp);
- genReleaseReg(srcOp);
- }
- else
- {
- //
- // This a variable-sized COPYBLK/INITBLK,
- // or a fixed size INITBLK with a variable init value,
- //
-
- // What order should the Dest, Val/Src, and Size be calculated
-
- compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
-
- genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
- genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
- if (opsPtr[2] != nullptr)
- {
- genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
- }
- genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
- genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
-
- noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
- (destPtr->gtRegNum == REG_ARG_0));
-
- noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
- (srcPtrOrVal->gtRegNum == REG_ARG_1));
-
- if (sizeIsConst)
- {
- inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
- }
- else
- {
- noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
- (sizeNode->gtRegNum == REG_ARG_2));
- }
-
- regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
-
- genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
- /* GT_INITBLK */
- : CORINFO_HELP_MEMSET,
- 0, EA_UNKNOWN);
-
- regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
-
- regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
- genReleaseReg(opsPtr[0]);
- genReleaseReg(opsPtr[1]);
- if (opsPtr[2] != nullptr)
- {
- genReleaseReg(opsPtr[2]);
- }
- }
-
- if (isCopyBlk && dest->AsBlk()->IsVolatile())
- {
- // Emit a memory barrier instruction after the CopyBlk
- instGen_MemoryBarrier();
- }
-#endif // !CPU_USES_BLOCK_MOVE
- }
-}
-BasicBlock dummyBB;
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- const genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP regs = regSet.rsMaskUsed;
- regMaskTP needReg = destReg;
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- emitAttr size;
- instruction ins;
- regMaskTP addrReg;
- GenTreePtr opsPtr[3];
- regMaskTP regsPtr[3];
-
-#ifdef DEBUG
- addrReg = 0xDEADCAFE;
-#endif
-
- noway_assert(tree->OperKind() & GTK_SMPOP);
-
- switch (oper)
- {
- case GT_ASG:
- if (tree->OperIsBlkOp())
- {
- genCodeForBlkOp(tree, destReg);
- }
- else
- {
- genCodeForTreeSmpOpAsg(tree);
- }
- return;
-
- case GT_ASG_LSH:
- case GT_ASG_RSH:
- case GT_ASG_RSZ:
- genCodeForAsgShift(tree, destReg, bestReg);
- return;
-
- case GT_ASG_AND:
- case GT_ASG_OR:
- case GT_ASG_XOR:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
- return;
-
- case GT_CHS:
- addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
-#ifdef _TARGET_XARCH_
- // Note that the specialCase here occurs when the treeType specifies a byte sized operation
- // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
- //
- bool specialCase;
- specialCase = false;
- if (op1->gtOper == GT_REG_VAR)
- {
- /* Get hold of the target register */
-
- reg = op1->gtRegVar.gtRegNum;
- if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, reg);
- regTracker.rsTrackRegTrash(byteReg);
-
- inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
- var_types op1Type = op1->TypeGet();
- instruction wideningIns = ins_Move_Extend(op1Type, true);
- inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
- regTracker.rsTrackRegTrash(reg);
- specialCase = true;
- }
- }
-
- if (!specialCase)
- {
- inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
- }
-#else // not _TARGET_XARCH_
- if (op1->gtFlags & GTF_REG_VAL)
- {
- inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
- }
- else
- {
- // Fix 388382 ARM JitStress WP7
- var_types op1Type = op1->TypeGet();
- regNumber reg = regSet.rsPickFreeReg();
- inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
- regTracker.rsTrackRegTrash(reg);
- inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
- inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
- }
-#endif
- if (op1->gtFlags & GTF_REG_VAL)
- regTracker.rsTrackRegTrash(op1->gtRegNum);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
- return;
-
- case GT_AND:
- case GT_OR:
- case GT_XOR:
- case GT_ADD:
- case GT_SUB:
- case GT_MUL:
- genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
- return;
-
- case GT_UMOD:
- genCodeForUnsignedMod(tree, destReg, bestReg);
- return;
-
- case GT_MOD:
- genCodeForSignedMod(tree, destReg, bestReg);
- return;
-
- case GT_UDIV:
- genCodeForUnsignedDiv(tree, destReg, bestReg);
- return;
-
- case GT_DIV:
- genCodeForSignedDiv(tree, destReg, bestReg);
- return;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- genCodeForShift(tree, destReg, bestReg);
- return;
-
- case GT_NEG:
- case GT_NOT:
-
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- /* Negate/reverse the value in the register */
-
- inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_IND:
- case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
-
- /* Make sure the operand is addressable */
-
- addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
-
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
-
- /* Figure out the size of the value being loaded */
-
- size = EA_ATTR(genTypeSize(tree->gtType));
-
- /* Pick a register for the value */
-
- if (needReg == RBM_ALLINT && bestReg == 0)
- {
- /* Absent a better suggestion, pick a useless register */
-
- bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
- }
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
- {
- noway_assert(size == EA_PTRSIZE);
- getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
- (int)op1->gtIntCon.gtIconVal);
- }
- else
- {
- /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
-
- inst_mov_RV_ST(reg, tree);
- }
-
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Note the new contents of the register we used */
-
- regTracker.rsTrackRegTrash(reg);
-
-#ifdef DEBUG
- /* Update the live set of register variables */
- if (compiler->opts.varNames)
- genUpdateLife(tree);
-#endif
-
- /* Now we can update the register pointer information */
-
- // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
- gcInfo.gcMarkRegPtrVal(reg, treeType);
-
- genCodeForTree_DONE_LIFE(tree, reg);
- return;
-
- case GT_CAST:
-
- genCodeForNumericCast(tree, destReg, bestReg);
- return;
-
- case GT_JTRUE:
-
- /* Is this a test of a relational operator? */
-
- if (op1->OperIsCompare())
- {
- /* Generate the conditional jump */
-
- genCondJump(op1);
-
- genUpdateLife(tree);
- return;
- }
-
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
- break;
-
- case GT_SWITCH:
- genCodeForSwitch(tree);
- return;
-
- case GT_RETFILT:
- noway_assert(tree->gtType == TYP_VOID || op1 != 0);
- if (op1 == 0) // endfinally
- {
- reg = REG_NA;
-
-#ifdef _TARGET_XARCH_
- /* Return using a pop-jmp sequence. As the "try" block calls
- the finally with a jmp, this leaves the x86 call-ret stack
- balanced in the normal flow of path. */
-
- noway_assert(isFramePointerRequired());
- inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
- inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
-#elif defined(_TARGET_ARM_)
-// Nothing needed for ARM
-#else
- NYI("TARGET");
-#endif
- }
- else // endfilter
- {
- genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegNum == REG_INTRET);
- /* The return value has now been computed */
- reg = op1->gtRegNum;
-
- /* Return */
- instGen_Return(0);
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_RETURN:
-
- // TODO: this should be done AFTER we called exit mon so that
- // we are sure that we don't have to keep 'this' alive
-
- if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
- {
- /* either it's an "empty" statement or the return statement
- of a synchronized method
- */
-
- genPInvokeMethodEpilog();
- }
-
- /* Is there a return value and/or an exit statement? */
-
- if (op1)
- {
- if (op1->gtType == TYP_VOID)
- {
- // We're returning nothing, just generate the block (shared epilog calls).
- genCodeForTree(op1, 0);
- }
-#ifdef _TARGET_ARM_
- else if (op1->gtType == TYP_STRUCT)
- {
- if (op1->gtOper == GT_CALL)
- {
- // We have a return call() because we failed to tail call.
- // In any case, just generate the call and be done.
- assert(compiler->IsHfa(op1));
- genCodeForCall(op1, true);
- genMarkTreeInReg(op1, REG_FLOATRET);
- }
- else
- {
- assert(op1->gtOper == GT_LCL_VAR);
- assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
- genLoadIntoFltRetRegs(op1);
- }
- }
- else if (op1->TypeGet() == TYP_FLOAT)
- {
- // This can only occur when we are returning a non-HFA struct
- // that is composed of a single float field and we performed
- // struct promotion and enregistered the float field.
- //
- genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
- getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
- }
-#endif // _TARGET_ARM_
- else
- {
- // we can now go through this code for compiler->genReturnBB. I've regularized all the code.
-
- // noway_assert(compiler->compCurBB != compiler->genReturnBB);
-
- noway_assert(op1->gtType != TYP_VOID);
-
- /* Generate the return value into the return register */
-
- genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
-
- /* The result must now be in the return register */
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegNum == REG_INTRET);
- }
-
- /* The return value has now been computed */
-
- reg = op1->gtRegNum;
-
- genCodeForTree_DONE(tree, reg);
- }
-
-#ifdef PROFILING_SUPPORTED
- // The profiling hook does not trash registers, so it's safe to call after we emit the code for
- // the GT_RETURN tree.
-
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- genProfilingLeaveCallback();
- }
-#endif
-#ifdef DEBUG
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
-
- BasicBlock* esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
- }
-#endif
- return;
-
- case GT_COMMA:
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- if (tree->gtType == TYP_VOID)
- {
- genEvalSideEffects(op2);
- genUpdateLife(op2);
- genEvalSideEffects(op1);
- genUpdateLife(tree);
- return;
- }
-
- // Generate op2
- genCodeForTree(op2, needReg);
- genUpdateLife(op2);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- regSet.rsMarkRegUsed(op2);
-
- // Do side effects of op1
- genEvalSideEffects(op1);
-
- // Recover op2 if spilled
- genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
-
- regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
-
- // set gc info if we need so
- gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
-
- genUpdateLife(tree);
- genCodeForTree_DONE(tree, op2->gtRegNum);
-
- return;
- }
- else
- {
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Generate side effects of the first operand */
-
- genEvalSideEffects(op1);
- genUpdateLife(op1);
-
- /* Is the value of the second operand used? */
-
- if (tree->gtType == TYP_VOID)
- {
- /* The right operand produces no result. The morpher is
- responsible for resetting the type of GT_COMMA nodes
- to TYP_VOID if op2 isn't meant to yield a result. */
-
- genEvalSideEffects(op2);
- genUpdateLife(tree);
- return;
- }
-
- /* Generate the second operand, i.e. the 'real' value */
-
- genCodeForTree(op2, needReg);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* The result of 'op2' is also the final result */
-
- reg = op2->gtRegNum;
-
- /* Remember whether we set the flags */
-
- tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- case GT_BOX:
- genCodeForTree(op1, needReg);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* The result of 'op1' is also the final result */
-
- reg = op1->gtRegNum;
-
- /* Remember whether we set the flags */
-
- tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_QMARK:
-
- genCodeForQmark(tree, destReg, bestReg);
- return;
-
- case GT_NOP:
-
-#if OPT_BOOL_OPS
- if (op1 == NULL)
- return;
-#endif
-
- /* Generate the operand into some register */
-
- genCodeForTree(op1, needReg);
-
- /* The result is the same as the operand */
-
- reg = op1->gtRegNum;
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_INTRINSIC:
-
- switch (tree->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Round:
- {
- noway_assert(tree->gtType == TYP_INT);
-
-#if FEATURE_STACK_FP_X87
- genCodeForTreeFlt(op1);
-
- /* Store the FP value into the temp */
- TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
-
- FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
- FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
- inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
-
- reg = regSet.rsPickReg(needReg, bestReg);
- regTracker.rsTrackRegTrash(reg);
-
- inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
-
- compiler->tmpRlsTemp(temp);
-#else
- genCodeForTreeFloat(tree, needReg, bestReg);
- return;
-#endif
- }
- break;
-
- default:
- noway_assert(!"unexpected math intrinsic");
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_LCLHEAP:
-
- reg = genLclHeap(op1);
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- genCodeForRelop(tree, destReg, bestReg);
- return;
-
- case GT_ADDR:
-
- genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
- return;
-
-#ifdef _TARGET_XARCH_
- case GT_LOCKADD:
-
- // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere.
- // For reference, op1 is the location. op2 is the addend or the value.
- if (op2->OperIsConst())
- {
- noway_assert(op2->TypeGet() == TYP_INT);
- ssize_t cns = op2->gtIntCon.gtIconVal;
-
- genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
- switch (cns)
- {
- case 1:
- instGen(INS_lock);
- instEmit_RM(INS_inc, op1, op1, 0);
- break;
- case -1:
- instGen(INS_lock);
- instEmit_RM(INS_dec, op1, op1, 0);
- break;
- default:
- assert((int)cns == cns); // By test above for AMD64.
- instGen(INS_lock);
- inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
- break;
- }
- genReleaseReg(op1);
- }
- else
- {
- // non constant addend means it needs to go into a register.
- ins = INS_add;
- goto LockBinOpCommon;
- }
-
- genFlagsEqualToNone(); // We didn't compute a result into a register.
- genUpdateLife(tree); // We didn't compute an operand into anything.
- return;
-
- case GT_XADD:
- ins = INS_xadd;
- goto LockBinOpCommon;
- case GT_XCHG:
- ins = INS_xchg;
- goto LockBinOpCommon;
- LockBinOpCommon:
- {
- // Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2
- // is a constant, it needs to be in a register. This should be the output register if
- // possible.
- //
- // For reference, gtOp1 is the location. gtOp2 is the addend or the value.
-
- GenTreePtr location = op1;
- GenTreePtr value = op2;
-
- // Again, a friendly reminder. IL calling convention is left to right.
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- // The atomic operations destroy this argument, so force it into a scratch register
- reg = regSet.rsPickFreeReg();
- genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
-
- // Must evaluate location into a register
- genCodeForTree(location, needReg, RBM_NONE);
- assert(location->gtFlags & GTF_REG_VAL);
- regSet.rsMarkRegUsed(location);
- regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
- genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
- regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
-
- if (ins != INS_xchg)
- {
- // xchg implies the lock prefix, but xadd and add require it.
- instGen(INS_lock);
- }
- instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
- genReleaseReg(value);
- regTracker.rsTrackRegTrash(reg);
- genReleaseReg(location);
- }
- else
- {
- regMaskTP addrReg;
- if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
- needReg, RegSet::KEEP_REG, &addrReg))
- {
- genUpdateLife(location);
-
- reg = regSet.rsPickFreeReg();
- genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
- addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
-
- if (ins != INS_xchg)
- {
- // xchg implies the lock prefix, but xadd and add require it.
- instGen(INS_lock);
- }
-
- // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
- // inst_TT_RV(ins, location, reg);
- sched_AM(ins, EA_4BYTE, reg, false, location, 0);
-
- genReleaseReg(value);
- regTracker.rsTrackRegTrash(reg);
- genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
- }
- else
- {
- // Must evalute location into a register.
- genCodeForTree(location, needReg, RBM_NONE);
- assert(location->gtFlags && GTF_REG_VAL);
- regSet.rsMarkRegUsed(location);
-
- // xadd destroys this argument, so force it into a scratch register
- reg = regSet.rsPickFreeReg();
- genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
- regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
- genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
- regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
-
- if (ins != INS_xchg)
- {
- // xchg implies the lock prefix, but xadd and add require it.
- instGen(INS_lock);
- }
-
- instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
-
- genReleaseReg(value);
- regTracker.rsTrackRegTrash(reg);
- genReleaseReg(location);
- }
- }
-
- // The flags are equal to the target of the tree (i.e. the result of the add), not to the
- // result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
- // that information to set the flags. Doesn't seem like there is a good reason for that.
- // Therefore, trash the flags.
- genFlagsEqualToNone();
-
- if (ins == INS_add)
- {
- // If the operator was add, then we were called from the GT_LOCKADD
- // case. In that case we don't use the result, so we don't need to
- // update anything.
- genUpdateLife(tree);
- }
- else
- {
- genCodeForTree_DONE(tree, reg);
- }
- }
- return;
-
-#else // !_TARGET_XARCH_
-
- case GT_LOCKADD:
- case GT_XADD:
- case GT_XCHG:
-
- NYI_ARM("LOCK instructions");
-#endif
-
- case GT_ARR_LENGTH:
- {
- // Make the corresponding ind(a + c) node, and do codegen for that.
- GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
- compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
- tree->SetOper(GT_IND);
- tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
- assert(tree->TypeGet() == TYP_INT);
- tree->gtOp.gtOp1 = addr;
- genCodeForTree(tree, destReg, bestReg);
- return;
- }
-
- case GT_OBJ:
- // All GT_OBJ nodes must have been morphed prior to this.
- noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected unary/binary operator");
- } // end switch (oper)
-
- unreached();
-}
-#ifdef _PREFAST_
-#pragma warning(pop) // End suppress PREFast warning about overly large function
-#endif
-
-regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
-{
- instruction ins;
- emitAttr size;
- bool unsv;
- bool andv = false;
- regNumber reg;
- GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
- var_types dstType = tree->CastToType();
- var_types srcType = op1->TypeGet();
-
- if (genTypeSize(srcType) < genTypeSize(dstType))
- {
- // Widening cast
-
- /* we need the source size */
-
- size = EA_ATTR(genTypeSize(srcType));
-
- noway_assert(size < EA_PTRSIZE);
-
- unsv = varTypeIsUnsigned(srcType);
- ins = ins_Move_Extend(srcType, op1->InReg());
-
- /*
- Special case: for a cast of byte to char we first
- have to expand the byte (w/ sign extension), then
- mask off the high bits.
- Use 'movsx' followed by 'and'
- */
- if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
- {
- noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
- andv = true;
- }
- }
- else
- {
- // Narrowing cast, or sign-changing cast
-
- noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
-
- size = EA_ATTR(genTypeSize(dstType));
-
- unsv = varTypeIsUnsigned(dstType);
- ins = ins_Move_Extend(dstType, op1->InReg());
- }
-
- noway_assert(size < EA_PTRSIZE);
-
- // Set bestReg to the same register a op1 if op1 is a regVar and is available
- if (op1->InReg())
- {
- regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
- if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
- {
- bestReg = op1RegMask;
- }
- }
-
- /* Is the value sitting in a non-byte-addressable register? */
-
- if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
- {
- if (unsv)
- {
- // for unsigned values we can AND, so it needs not be a byte register
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- ins = INS_AND;
- }
- else
- {
- /* Move the value into a byte register */
-
- reg = regSet.rsGrabReg(RBM_BYTE_REGS);
- }
-
- if (reg != op1->gtRegNum)
- {
- /* Move the value into that register */
-
- regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
- inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
-
- /* The value has a new home now */
-
- op1->gtRegNum = reg;
- }
- }
- else
- {
- /* Pick a register for the value (general case) */
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- // if we (might) need to set the flags and the value is in the same register
- // and we have an unsigned value then use AND instead of MOVZX
- if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
- {
-#ifdef _TARGET_X86_
- noway_assert(ins == INS_movzx);
-#endif
- ins = INS_AND;
- }
- }
-
- if (ins == INS_AND)
- {
- noway_assert(andv == false && unsv);
-
- /* Generate "and reg, MASK */
-
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
- }
- else
- {
-#ifdef _TARGET_XARCH_
- noway_assert(ins == INS_movsx || ins == INS_movzx);
-#endif
-
- /* Generate "movsx/movzx reg, [addr]" */
-
- inst_RV_ST(ins, size, reg, op1);
-
- /* Mask off high bits for cast from byte to char */
-
- if (andv)
- {
-#ifdef _TARGET_XARCH_
- noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
-#endif
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
- }
- }
-
- regTracker.rsTrackRegTrash(reg);
- return reg;
-}
-
-void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- var_types dstType = tree->CastToType();
- var_types baseType = TYP_INT;
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP needReg = destReg;
- regMaskTP addrReg;
- emitAttr size;
- BOOL unsv;
-
- /*
- * Constant casts should have been folded earlier
- * If not finite don't bother
- * We don't do this optimization for debug code/no optimization
- */
-
- noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) ||
- tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
- !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
-
- noway_assert(dstType != TYP_VOID);
-
- /* What type are we casting from? */
-
- switch (op1->TypeGet())
- {
- case TYP_LONG:
-
- /* Special case: the long is generated via the mod of long
- with an int. This is really an int and need not be
- converted to a reg pair. NOTE: the flag only indicates
- that this is a case to TYP_INT, it hasn't actually
- verified the second operand of the MOD! */
-
- if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
- {
-
- /* Verify that the op2 of the mod node is
- 1) An integer tree, or
- 2) A long constant that is small enough to fit in an integer
- */
-
- GenTreePtr modop2 = op1->gtOp.gtOp2;
- if ((genActualType(modop2->gtType) == TYP_INT) ||
- ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
- {
- genCodeForTree(op1, destReg, bestReg);
-
-#ifdef _TARGET_64BIT_
- reg = op1->gtRegNum;
-#else // _TARGET_64BIT_
- reg = genRegPairLo(op1->gtRegPair);
-#endif //_TARGET_64BIT_
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
- }
-
- /* Make the operand addressable. When gtOverflow() is true,
- hold on to the addrReg as we will need it to access the higher dword */
-
- op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
- // twice!)
- // See, e.g., the TYP_INT case below...
-
- addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
-
- /* Load the lower half of the value into some register */
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- /* Can we simply use the low part of the value? */
- reg = genRegPairLo(op1->gtRegPair);
-
- if (tree->gtOverflow())
- goto REG_OK;
-
- regMaskTP loMask;
- loMask = genRegMask(reg);
- if (loMask & regSet.rsRegMaskFree())
- bestReg = loMask;
- }
-
- // for cast overflow we need to preserve addrReg for testing the hiDword
- // so we lock it to prevent regSet.rsPickReg from picking it.
- if (tree->gtOverflow())
- regSet.rsLockUsedReg(addrReg);
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- if (tree->gtOverflow())
- regSet.rsUnlockUsedReg(addrReg);
-
- noway_assert(genStillAddressable(op1));
-
- REG_OK:
- if (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair)))
- {
- /* Generate "mov reg, [addr-mode]" */
- inst_RV_TT(ins_Load(TYP_INT), reg, op1);
- }
-
- /* conv.ovf.i8i4, or conv.ovf.u8u4 */
-
- if (tree->gtOverflow())
- {
- regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair) : REG_NA;
-
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
-
- switch (dstType)
- {
- case TYP_INT:
- // conv.ovf.i8.i4
- /* Generate the following sequence
-
- test loDWord, loDWord // set flags
- jl neg
- pos: test hiDWord, hiDWord // set flags
- jne ovf
- jmp done
- neg: cmp hiDWord, 0xFFFFFFFF
- jne ovf
- done:
-
- */
-
- instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
- if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0)
- {
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- goto UPPER_BITS_ZERO;
- }
-
-#if CPU_LOAD_STORE_ARCH
- // This is tricky.
- // We will generate code like
- // if (...)
- // {
- // ...
- // }
- // else
- // {
- // ...
- // }
- // We load the tree op1 into regs when we generate code for if clause.
- // When we generate else clause, we see the tree is already loaded into reg, and start use it
- // directly.
- // Well, when the code is run, we may execute else clause without going through if clause.
- //
- genCodeForTree(op1, 0);
-#endif
-
- BasicBlock* neg;
- BasicBlock* done;
-
- neg = genCreateTempLabel();
- done = genCreateTempLabel();
-
- // Is the loDWord positive or negative
- inst_JMP(jmpLTS, neg);
-
- // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
-
- if (hiReg < REG_STK)
- {
- instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
- }
-
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
- inst_JMP(EJ_jmp, done);
-
- // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
-
- genDefineTempLabel(neg);
-
- if (hiReg < REG_STK)
- {
- inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
- }
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
-
- // Done
-
- genDefineTempLabel(done);
-
- break;
-
- case TYP_UINT: // conv.ovf.u8u4
- UPPER_BITS_ZERO:
- // Just check that the upper DWord is 0
-
- if (hiReg < REG_STK)
- {
- instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, 0, 4);
- }
-
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
- break;
-
- default:
- noway_assert(!"Unexpected dstType");
- break;
- }
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- }
-
- regTracker.rsTrackRegTrash(reg);
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case TYP_BOOL:
- case TYP_BYTE:
- case TYP_SHORT:
- case TYP_CHAR:
- case TYP_UBYTE:
- break;
-
- case TYP_UINT:
- case TYP_INT:
- break;
-
-#if FEATURE_STACK_FP_X87
- case TYP_FLOAT:
- NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
- break;
-
- case TYP_DOUBLE:
- if (compiler->opts.compCanUseSSE2)
- {
- // do the SSE2 based cast inline
- // getting the fp operand
-
- regMaskTP addrRegInt = 0;
- regMaskTP addrRegFlt = 0;
-
- // make the operand addressable
- // We don't want to collapse constant doubles into floats, as the SSE2 instruction
- // operates on doubles. Note that these (casts from constant doubles) usually get
- // folded, but we don't do it for some cases (infinitys, etc). So essentially this
- // shouldn't affect performance or size at all. We're fixing this for #336067
- op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
- if (!addrRegFlt && !op1->IsRegVar())
- {
- // we have the address
-
- inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
- genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
- genUpdateLife(op1);
-
- reg = regSet.rsPickReg(needReg);
- getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
-
- regTracker.rsTrackRegTrash(reg);
- genCodeForTree_DONE(tree, reg);
- }
- else
- {
- // we will need to use a temp to get it into the xmm reg
- var_types typeTemp = op1->TypeGet();
- TempDsc* temp = compiler->tmpGetTemp(typeTemp);
-
- size = EA_ATTR(genTypeSize(typeTemp));
-
- if (addrRegFlt)
- {
- // On the fp stack; Take reg to top of stack
-
- FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
- }
- else
- {
- // op1->IsRegVar()
- // pick a register
- reg = regSet.PickRegFloat();
- if (!op1->IsRegVarDeath())
- {
- // Load it on the fp stack
- genLoadStackFP(op1, reg);
- }
- else
- {
- // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
- genLoadStackFP(op1, reg);
- FlatFPX87_MoveToTOS(&compCurFPState, reg);
- }
- }
-
- // pop it off the fp stack
- compCurFPState.Pop();
-
- getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
- // pick a reg
- reg = regSet.rsPickReg(needReg);
-
- inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
- getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
-
- // done..release the temp
- compiler->tmpRlsTemp(temp);
-
- // the reg is now trashed
- regTracker.rsTrackRegTrash(reg);
- genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
- genUpdateLife(op1);
- genCodeForTree_DONE(tree, reg);
- }
- }
-#else
- case TYP_FLOAT:
- case TYP_DOUBLE:
- genCodeForTreeFloat(tree, needReg, bestReg);
-#endif // FEATURE_STACK_FP_X87
- return;
-
- default:
- noway_assert(!"unexpected cast type");
- }
-
- if (tree->gtOverflow())
- {
- /* Compute op1 into a register, and free the register */
-
- genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
- reg = op1->gtRegNum;
-
- /* Do we need to compare the value, or just check masks */
-
- ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
- ssize_t typeMask;
-
- switch (dstType)
- {
- case TYP_BYTE:
- typeMask = ssize_t((int)0xFFFFFF80);
- typeMin = SCHAR_MIN;
- typeMax = SCHAR_MAX;
- unsv = (tree->gtFlags & GTF_UNSIGNED);
- break;
- case TYP_SHORT:
- typeMask = ssize_t((int)0xFFFF8000);
- typeMin = SHRT_MIN;
- typeMax = SHRT_MAX;
- unsv = (tree->gtFlags & GTF_UNSIGNED);
- break;
- case TYP_INT:
- typeMask = ssize_t((int)0x80000000L);
-#ifdef _TARGET_64BIT_
- unsv = (tree->gtFlags & GTF_UNSIGNED);
- typeMin = INT_MIN;
- typeMax = INT_MAX;
-#else // _TARGET_64BIT_
- noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
- unsv = true;
-#endif // _TARGET_64BIT_
- break;
- case TYP_UBYTE:
- unsv = true;
- typeMask = ssize_t((int)0xFFFFFF00L);
- break;
- case TYP_CHAR:
- unsv = true;
- typeMask = ssize_t((int)0xFFFF0000L);
- break;
- case TYP_UINT:
- unsv = true;
-#ifdef _TARGET_64BIT_
- typeMask = 0xFFFFFFFF00000000LL;
-#else // _TARGET_64BIT_
- typeMask = 0x80000000L;
- noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
-#endif // _TARGET_64BIT_
- break;
- default:
- NO_WAY("Unknown type");
- return;
- }
-
- // If we just have to check a mask.
- // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
- // or conv.i4u4
-
- if (unsv)
- {
- inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
- }
- else
- {
- // Check the value is in range.
- // This must be conv.ovf.i4i1, etc.
-
- // Compare with the MAX
-
- noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
-
- inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
- emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
-
- // Compare with the MIN
-
- inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- /* Make the operand addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
-
- reg = genIntegerCast(tree, needReg, bestReg);
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a leaf node of type GT_ADDR
- */
-
-void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1;
- regNumber reg;
- regMaskTP needReg = destReg;
- regMaskTP addrReg;
-
-#ifdef DEBUG
- reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use
- addrReg = 0xDEADCAFE;
-#endif
-
- // We should get here for ldloca, ldarga, ldslfda, ldelema,
- // or ldflda.
- if (oper == GT_ARR_ELEM)
- {
- op1 = tree;
- }
- else
- {
- op1 = tree->gtOp.gtOp1;
- }
-
- // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
- if (oper == GT_ARR_ELEM)
- {
- // To get the address of the array element,
- // we first call genMakeAddrArrElem to make the element addressable.
- // (That is, for example, we first emit code to calculate EBX, and EAX.)
- // And then use lea to obtain the address.
- // (That is, for example, we then emit
- // lea EBX, bword ptr [EBX+4*EAX+36]
- // to obtain the address of the array element.)
- addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
- }
- else
- {
- addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
- }
-
- noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
-
- // We want to reuse one of the scratch registers that were used
- // in forming the address mode as the target register for the lea.
- // If bestReg is unset or if it is set to one of the registers used to
- // form the address (i.e. addrReg), we calculate the scratch register
- // to use as the target register for the LEA
-
- bestReg = regSet.rsUseIfZero(bestReg, addrReg);
- bestReg = regSet.rsNarrowHint(bestReg, addrReg);
-
- /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
- it since keepReg==false.
- If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
- So this is guaranteed not to spill addrReg */
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- // Slight workaround, force the inst routine to think that
- // value being loaded is an int (since that is what what
- // LEA will return) otherwise it would try to allocate
- // two registers for a long etc.
- noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
- op1->gtType = treeType;
-
- inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
-
- // The Lea instruction above better not have tried to put the
- // 'value' pointed to by 'op1' in a register, LEA will not work.
- noway_assert(!(op1->gtFlags & GTF_REG_VAL));
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
- // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
-
- regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
- gcInfo.gcMarkRegPtrVal(reg, treeType);
-
- genCodeForTree_DONE(tree, reg);
-}
-
-#ifdef _TARGET_ARM_
-
-/*****************************************************************************
- *
- * Move (load/store) between float ret regs and struct promoted variable.
- *
- * varDsc - The struct variable to be loaded from or stored into.
- * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
- *
- */
-void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
-{
- regNumber curReg = REG_FLOATRET;
-
- unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
- for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
- {
- LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
-
- // Is the struct field promoted and sitting in a register?
- if (varDscFld->lvRegister)
- {
- // Move from the struct field into curReg if load
- // else move into struct field from curReg if store
- regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
- regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
- if (srcReg != dstReg)
- {
- inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
- regTracker.rsTrackRegCopy(dstReg, srcReg);
- }
- }
- else
- {
- // This field is in memory, do a move between the field and float registers.
- emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
- if (isLoadIntoFlt)
- {
- getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
- regTracker.rsTrackRegTrash(curReg);
- }
- else
- {
- getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
- }
- }
-
- // Advance the current reg.
- curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
- }
-}
-
-void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
-{
- assert(tree->TypeGet() == TYP_STRUCT);
- assert(tree->gtOper == GT_LCL_VAR);
- LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
- int slots = varDsc->lvSize() / REGSIZE_BYTES;
- if (varDsc->lvPromoted)
- {
- genLdStFltRetRegsPromotedVar(varDsc, true);
- }
- else
- {
- if (slots <= 2)
- {
- // Use the load float/double instruction.
- inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
- (slots == 1) ? EA_4BYTE : EA_8BYTE);
- }
- else
- {
- // Use the load store multiple instruction.
- regNumber reg = regSet.rsPickReg(RBM_ALLINT);
- inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
- regTracker.rsTrackRegTrash(reg);
- getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
- }
- }
- genMarkTreeInReg(tree, REG_FLOATRET);
-}
-
-void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
-{
- assert(tree->TypeGet() == TYP_STRUCT);
- assert(tree->OperGet() == GT_ASG);
-
- // LHS should be lcl var or fld.
- GenTreePtr op1 = tree->gtOp.gtOp1;
-
- // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
- // handling multiple levels of inlined functions that return HFA on the right-hand-side.
- // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
- // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
- // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
- // as a regular assert().
- noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
-
- // The RHS should be a call.
- GenTreePtr op2 = tree->gtOp.gtOp2;
- assert(op2->gtOper == GT_CALL);
-
- // Generate code for call and copy the return registers into the local.
- regMaskTP retMask = genCodeForCall(op2, true);
-
- // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
- regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
- assert((mask & (mask - 1)) == 0);
- assert(mask <= (1 << MAX_HFA_RET_SLOTS));
- assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
-#endif
-
- int slots = genCountBits(retMask & RBM_ALLFLOAT);
-
- LclVarDsc* varDsc = &compiler->lvaTable[varNum];
-
- if (varDsc->lvPromoted)
- {
- genLdStFltRetRegsPromotedVar(varDsc, false);
- }
- else
- {
- if (slots <= 2)
- {
- inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
- (slots == 1) ? EA_4BYTE : EA_8BYTE);
- }
- else
- {
- regNumber reg = regSet.rsPickReg(RBM_ALLINT);
- inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
- regTracker.rsTrackRegTrash(reg);
- getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
- }
- }
-}
-
-#endif // _TARGET_ARM_
-
-/*****************************************************************************
- *
- * Generate code for a GT_ASG tree
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
-{
- noway_assert(tree->gtOper == GT_ASG);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- regMaskTP needReg = RBM_ALLINT;
- regMaskTP bestReg = RBM_CORRUPT;
- regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT);
- bool ovfl = false; // Do we need an overflow check
- bool volat = false; // Is this a volatile store
- regMaskTP regGC;
- instruction ins;
-#ifdef DEBUGGING_SUPPORT
- unsigned lclVarNum = compiler->lvaCount;
- unsigned lclILoffs = DUMMY_INIT(0);
-#endif
-
-#ifdef _TARGET_ARM_
- if (tree->gtType == TYP_STRUCT)
- {
- // We use copy block to assign structs, however to receive HFAs in registers
- // from a CALL, we use assignment, var = (hfa) call();
- assert(compiler->IsHfa(tree));
- genStoreFromFltRetRegs(tree);
- return;
- }
-#endif
-
-#ifdef DEBUG
- if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
- {
- if (varTypeIsFloating(op1))
- assert(!"Bad IL: Illegal assignment of integer into float!");
- else
- assert(!"Bad IL: Illegal assignment of float into integer!");
- }
-#endif
-
- if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
- {
- op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
- }
-
- /* Is the target a register or local variable? */
- switch (op1->gtOper)
- {
- unsigned varNum;
- LclVarDsc* varDsc;
-
- case GT_LCL_VAR:
- varNum = op1->gtLclVarCommon.gtLclNum;
- noway_assert(varNum < compiler->lvaCount);
- varDsc = compiler->lvaTable + varNum;
-
-#ifdef DEBUGGING_SUPPORT
- /* For non-debuggable code, every definition of a lcl-var has
- * to be checked to see if we need to open a new scope for it.
- * Remember the local var info to call siCheckVarScope
- * AFTER code generation of the assignment.
- */
- if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
- {
- lclVarNum = varNum;
- lclILoffs = op1->gtLclVar.gtLclILoffs;
- }
-#endif
-
- /* Check against dead store ? (with min opts we may have dead stores) */
-
- noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
-
- /* Does this variable live in a register? */
-
- if (genMarkLclVar(op1))
- goto REG_VAR2;
-
- break;
-
- REG_VAR2:
-
- /* Get hold of the target register */
-
- regNumber op1Reg;
-
- op1Reg = op1->gtRegVar.gtRegNum;
-
-#ifdef DEBUG
- /* Compute the RHS (hopefully) into the variable's register.
- For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
- as variables are kept alive everywhere. So we have to be
- careful if we want to compute the value directly into
- the variable's register. */
-
- bool needToUpdateRegSetCheckLevel;
- needToUpdateRegSetCheckLevel = false;
-#endif
-
- // We should only be accessing lvVarIndex if varDsc is tracked.
- assert(varDsc->lvTracked);
-
- if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
- {
- noway_assert(compiler->opts.compDbgCode);
-
- /* The predictor might expect us to generate op2 directly
- into the var's register. However, since the variable is
- already alive, first kill it and its register. */
-
- if (rpCanAsgOperWithoutReg(op2, true))
- {
- genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
- needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
-#ifdef DEBUG
- needToUpdateRegSetCheckLevel = true;
-#endif
- }
- }
- else
- {
- needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
- }
-
-#ifdef DEBUG
-
- /* Special cases: op2 is a GT_CNS_INT */
-
- if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
- {
- /* Save the old life status */
-
- VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
- VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
-
- /* Set a flag to avoid printing the message
- and remember that life was changed. */
-
- genTempLiveChg = false;
- }
-#endif
-
-#ifdef DEBUG
- if (needToUpdateRegSetCheckLevel)
- compiler->compRegSetCheckLevel++;
-#endif
- genCodeForTree(op2, needReg, genRegMask(op1Reg));
-#ifdef DEBUG
- if (needToUpdateRegSetCheckLevel)
- compiler->compRegSetCheckLevel--;
- noway_assert(compiler->compRegSetCheckLevel >= 0);
-#endif
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make sure the value ends up in the right place ... */
-
- if (op2->gtRegNum != op1Reg)
- {
- /* Make sure the target of the store is available */
-
- if (regSet.rsMaskUsed & genRegMask(op1Reg))
- regSet.rsSpillReg(op1Reg);
-
-#ifdef _TARGET_ARM_
- if (op1->TypeGet() == TYP_FLOAT)
- {
- // This can only occur when we are returning a non-HFA struct
- // that is composed of a single float field.
- //
- inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
- }
- else
-#endif // _TARGET_ARM_
- {
- inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
- }
-
- /* The value has been transferred to 'op1Reg' */
-
- regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
-
- if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
-
- gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
- }
- else
- {
- // First we need to remove it from the original reg set mask (or else trigger an
- // assert when we add it to the other reg set mask).
- gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
- gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
-
- // The emitter has logic that tracks the GCness of registers and asserts if you
- // try to do bad things to a GC pointer (like lose its GCness).
-
- // An explict cast of a GC pointer to an int (which is legal if the
- // pointer is pinned) is encoded as an assignment of a GC source
- // to a integer variable. Unfortunately if the source was the last
- // use, and the source register gets reused by the destination, no
- // code gets emitted (That is where we are at right now). The emitter
- // thinks the register is a GC pointer (it did not see the cast).
- // This causes asserts, as well as bad GC info since we will continue
- // to report the register as a GC pointer even if we do arithmetic
- // with it. So force the emitter to see the change in the type
- // of variable by placing a label.
- // We only have to do this check at this point because in the
- // CAST morphing, we create a temp and assignment whenever we
- // have a cast that loses its GCness.
-
- if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
- {
- void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur);
- }
- }
-
- addrReg = 0;
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
- goto LExit;
-
- case GT_LCL_FLD:
-
- // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
- // to worry about it being enregistered.
- noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
- break;
-
- case GT_CLS_VAR:
-
- __fallthrough;
-
- case GT_IND:
- case GT_NULLCHECK:
-
- assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
-
- if (op1->gtFlags & GTF_IND_VOLATILE)
- {
- volat = true;
- }
-
- break;
-
- default:
- break;
- }
-
- /* Is the value being assigned a simple one? */
-
- noway_assert(op2);
- switch (op2->gtOper)
- {
- case GT_LCL_VAR:
-
- if (!genMarkLclVar(op2))
- goto SMALL_ASG;
-
- __fallthrough;
-
- case GT_REG_VAR:
-
- /* Is the target a byte/short/char value? */
-
- if (varTypeIsSmall(op1->TypeGet()))
- goto SMALL_ASG;
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- goto SMALL_ASG;
-
- /* Make the target addressable */
-
- op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
-
- /* Does the write barrier helper do the assignment? */
-
- regGC = WriteBarrier(op1, op2, addrReg);
-
- // Was assignment done by the WriteBarrier
- if (regGC == RBM_NONE)
- {
-#ifdef _TARGET_ARM_
- if (volat)
- {
- // Emit a memory barrier instruction before the store
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
-
- // This is done in WriteBarrier when (regGC != RBM_NONE)
-
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- }
-
- /* Free up the RHS */
- genUpdateLife(op2);
-
- /* Remember that we've also touched the op2 register */
-
- addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
- break;
-
- case GT_CNS_INT:
-
- ssize_t ival;
- ival = op2->gtIntCon.gtIconVal;
- emitAttr size;
- size = emitTypeSize(tree->TypeGet());
-
- ins = ins_Store(op1->TypeGet());
-
- // If we are storing a constant into a local variable
- // we extend the size of the store here
- // this normally takes place in CodeGen::inst_TT_IV on x86.
- //
- if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
- {
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-
- // Fix the immediate by sign extending if needed
- if (!varTypeIsUnsigned(varDsc->TypeGet()))
- {
- if (size == EA_1BYTE)
- {
- if ((ival & 0x7f) != ival)
- ival = ival | 0xffffff00;
- }
- else
- {
- assert(size == EA_2BYTE);
- if ((ival & 0x7fff) != ival)
- ival = ival | 0xffff0000;
- }
- }
-
- // A local stack slot is at least 4 bytes in size, regardless of
- // what the local var is typed as, so auto-promote it here
- // unless it is a field of a promoted struct
- if (!varDsc->lvIsStructField)
- {
- size = EA_SET_SIZE(size, EA_4BYTE);
- ins = ins_Store(TYP_INT);
- }
- }
-
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
-
-#ifdef _TARGET_ARM_
- if (volat)
- {
- // Emit a memory barrier instruction before the store
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Move the value into the target */
-
- noway_assert(op1->gtOper != GT_REG_VAR);
- if (compiler->opts.compReloc && op2->IsIconHandle())
- {
- /* The constant is actually a handle that may need relocation
- applied to it. genComputeReg will do the right thing (see
- code in genCodeForTreeConst), so we'll just call it to load
- the constant into a register. */
-
- genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
- addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- inst_TT_RV(ins, op1, op2->gtRegNum);
- genReleaseReg(op2);
- }
- else
- {
- regSet.rsLockUsedReg(addrReg);
-
-#if REDUNDANT_LOAD
- bool copyIconFromReg = true;
- regNumber iconReg = REG_NA;
-
-#ifdef _TARGET_ARM_
- // Only if the constant can't be encoded in a small instruction,
- // look for another register to copy the value from. (Assumes
- // target is a small register.)
- if ((op1->gtFlags & GTF_REG_VAL) && !isRegPairType(tree->gtType) &&
- arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
- {
- copyIconFromReg = false;
- }
-#endif // _TARGET_ARM_
-
- if (copyIconFromReg)
- {
- iconReg = regTracker.rsIconIsInReg(ival);
- if (iconReg == REG_NA)
- copyIconFromReg = false;
- }
-
- if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
- (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
- {
- /* Move the value into the target */
-
- inst_TT_RV(ins, op1, iconReg, 0, size);
- }
- else
-#endif // REDUNDANT_LOAD
- {
- inst_TT_IV(ins, op1, ival, 0, size);
- }
-
- regSet.rsUnlockUsedReg(addrReg);
- }
-
- /* Free up anything that was tied up by the LHS */
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- break;
-
- default:
-
- SMALL_ASG:
-
- bool isWriteBarrier = false;
- regMaskTP needRegOp1 = RBM_ALLINT;
- RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
-
- /* Is the LHS more complex than the RHS? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Is the target a byte/short/char value? */
-
- if (varTypeIsSmall(op1->TypeGet()))
- {
- noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
- // TODO: Why does this have to be true?
- compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
- compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
-
- if (op2->gtOper == GT_CAST && !op2->gtOverflow())
- {
- /* Special case: cast to small type */
-
- if (op2->CastToType() >= op1->gtType)
- {
- /* Make sure the cast operand is not > int */
-
- if (op2->CastFromType() <= TYP_INT)
- {
- /* Cast via a non-smaller type */
-
- op2 = op2->gtCast.CastOp();
- }
- }
- }
-
- if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
- {
- unsigned mask;
- switch (op1->gtType)
- {
- case TYP_BYTE:
- mask = 0x000000FF;
- break;
- case TYP_SHORT:
- mask = 0x0000FFFF;
- break;
- case TYP_CHAR:
- mask = 0x0000FFFF;
- break;
- default:
- goto SIMPLE_SMALL;
- }
-
- if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
- {
- /* Redundant AND */
-
- op2 = op2->gtOp.gtOp1;
- }
- }
-
- /* Must get the new value into a byte register */
-
- SIMPLE_SMALL:
- if (varTypeIsByte(op1->TypeGet()))
- genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
- else
- goto NOT_SMALL;
- }
- else
- {
- NOT_SMALL:
- /* Generate the RHS into a register */
-
- isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
- if (isWriteBarrier)
- {
-#if NOGC_WRITE_BARRIERS
- // Exclude the REG_WRITE_BARRIER from op2's needReg mask
- needReg = Target::exclude_WriteBarrierReg(needReg);
- mustReg = RegSet::EXACT_REG;
-#else // !NOGC_WRITE_BARRIERS
- // This code should be generic across architectures.
-
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- needRegOp1 = RBM_ARG_0;
- needReg = RBM_ARG_1;
-#endif // !NOGC_WRITE_BARRIERS
- }
- genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
- }
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make the target addressable */
-
- op1 = genCodeForCommaTree(op1); // Strip off any comma expressions.
- addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
-
- /* Make sure the RHS register hasn't been spilled;
- keep the register marked as "used", otherwise
- we might get the pointer lifetimes wrong.
- */
-
- if (varTypeIsByte(op1->TypeGet()))
- needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
-
- genRecoverReg(op2, needReg, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Lock the RHS temporarily (lock only already used) */
-
- regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
-
- /* Make sure the LHS is still addressable */
-
- addrReg = genKeepAddressable(op1, addrReg);
-
- /* We can unlock (only already used ) the RHS register */
-
- regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
-
- /* Does the write barrier helper do the assignment? */
-
- regGC = WriteBarrier(op1, op2, addrReg);
-
- if (regGC != 0)
- {
- // Yes, assignment done by the WriteBarrier
- noway_assert(isWriteBarrier);
- }
- else
- {
-#ifdef _TARGET_ARM_
- if (volat)
- {
- // Emit a memory barrier instruction before the store
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
- }
-
-#ifdef DEBUG
- /* Update the current liveness info */
- if (compiler->opts.varNames)
- genUpdateLife(tree);
-#endif
-
- // If op2 register is still in use, free it. (Might not be in use, if
- // a full-call write barrier was done, and the register was a caller-saved
- // register.)
- regMaskTP op2RM = genRegMask(op2->gtRegNum);
- if (op2RM & regSet.rsMaskUsed)
- regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
-
- // This is done in WriteBarrier when (regGC != 0)
- if (regGC == 0)
- {
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- }
- }
- else
- {
- /* Make the target addressable */
-
- isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
-
- if (isWriteBarrier)
- {
-#if NOGC_WRITE_BARRIERS
- /* Try to avoid RBM_TMP_0 */
- needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
- mustReg = RegSet::EXACT_REG; // For op2
-#else // !NOGC_WRITE_BARRIERS
- // This code should be generic across architectures.
-
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- needRegOp1 = RBM_ARG_0;
- needReg = RBM_ARG_1;
- mustReg = RegSet::EXACT_REG; // For op2
-#endif // !NOGC_WRITE_BARRIERS
- }
-
- needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
-
- op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
-
- addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
-
-#if CPU_HAS_BYTE_REGS
- /* Is the target a byte value? */
- if (varTypeIsByte(op1->TypeGet()))
- {
- /* Must get the new value into a byte register */
- needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
- mustReg = RegSet::EXACT_REG;
-
- if (op2->gtType >= op1->gtType)
- op2->gtFlags |= GTF_SMALL_OK;
- }
-#endif
-
-#if NOGC_WRITE_BARRIERS
- /* For WriteBarrier we can't use REG_WRITE_BARRIER */
- if (isWriteBarrier)
- needReg = Target::exclude_WriteBarrierReg(needReg);
-
- /* Also avoid using the previously computed addrReg(s) */
- bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
-
- /* If we have a reg available to grab then use bestReg */
- if (bestReg & regSet.rsRegMaskCanGrab())
- needReg = bestReg;
-
- mustReg = RegSet::EXACT_REG;
-#endif
-
- /* Generate the RHS into a register */
- genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make sure the target is still addressable */
- addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Does the write barrier helper do the assignment? */
-
- regGC = WriteBarrier(op1, op2, addrReg);
-
- if (regGC != 0)
- {
- // Yes, assignment done by the WriteBarrier
- noway_assert(isWriteBarrier);
- }
- else
- {
- assert(!isWriteBarrier);
-
-#ifdef _TARGET_ARM_
- if (volat)
- {
- // Emit a memory barrier instruction before the store
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
- }
-
- /* The new value is no longer needed */
-
- genReleaseReg(op2);
-
-#ifdef DEBUG
- /* Update the current liveness info */
- if (compiler->opts.varNames)
- genUpdateLife(tree);
-#endif
-
- // This is done in WriteBarrier when (regGC != 0)
- if (regGC == 0)
- {
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- }
- }
-
- addrReg = RBM_NONE;
- break;
- }
-
- noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
-
-LExit:
-#ifdef DEBUGGING_SUPPORT
- /* For non-debuggable code, every definition of a lcl-var has
- * to be checked to see if we need to open a new scope for it.
- */
- if (lclVarNum < compiler->lvaCount)
- siCheckVarScope(lclVarNum, lclILoffs);
-#endif
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-/*****************************************************************************
- *
- * Generate code to complete the assignment operation
- */
-
-void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl)
-{
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- noway_assert(op2);
-
- if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
- genUpdateLife(op1);
- genUpdateLife(tree);
-
-#if REDUNDANT_LOAD
-
- if (op1->gtOper == GT_LCL_VAR)
- regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
-
- /* Have we just assigned a value that is in a register? */
-
- if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG)
- {
- regTracker.rsTrackRegAssign(op1, op2);
- }
-
-#endif
-
- noway_assert(addrReg != 0xDEADCAFE);
-
- gcInfo.gcMarkRegSetNpt(addrReg);
-
- if (ovfl)
- {
- noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
-
- /* If GTF_REG_VAL is not set, and it is a small type, then
- we must have loaded it up from memory, done the increment,
- checked for overflow, and then stored it back to memory */
-
- bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->gtFlags & GTF_REG_VAL);
-
- if (!ovfCheckDone)
- {
- // For small sizes, reg should be set as we sign/zero extend it.
-
- noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
-
- /* Currently we don't morph x=x+y into x+=y in try blocks
- * if we need overflow check, as x+y may throw an exception.
- * We can do it if x is not live on entry to the catch block.
- */
- noway_assert(!compiler->compCurBB->hasTryIndex());
-
- genCheckOverflow(tree);
- }
- }
-}
-
-/*****************************************************************************
- *
- * Generate code for a special op tree
- */
-
-void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- genTreeOps oper = tree->OperGet();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP regs = regSet.rsMaskUsed;
-
- noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
-
- switch (oper)
- {
- case GT_CALL:
- regs = genCodeForCall(tree, true);
-
- /* If the result is in a register, make sure it ends up in the right place */
-
- if (regs != RBM_NONE)
- {
- genMarkTreeInReg(tree, genRegNumFromMask(regs));
- }
-
- genUpdateLife(tree);
- return;
-
- case GT_FIELD:
- NO_WAY("should not see this operator in this phase");
- break;
-
- case GT_ARR_BOUNDS_CHECK:
- {
-#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
- // MUST NEVER CHECK-IN WITH THIS ENABLED.
- // This is just for convenience in doing performance investigations and requires x86ret builds
- if (!JitConfig.JitNoRngChk())
-#endif
- genRangeCheck(tree);
- }
- return;
-
- case GT_ARR_ELEM:
- genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
- return;
-
- case GT_CMPXCHG:
- {
-#if defined(_TARGET_XARCH_)
- // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
-
- // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling
- // right now, just get the trees evaluated.
-
- // As a friendly reminder. IL args are evaluated left to right.
-
- GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1
- GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2
- GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
- regMaskTP addrReg;
-
- bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
- RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
-
- if (!isAddr)
- {
- genCodeForTree(location, RBM_NONE, RBM_NONE);
- assert(location->gtFlags && GTF_REG_VAL);
- addrReg = genRegMask(location->gtRegNum);
- regSet.rsMarkRegUsed(location);
- }
-
- // We must have a reg for the Value, but it doesn't really matter which register.
-
- // Try to avoid EAX and the address regsiter if possible.
- genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
-
-#ifdef DEBUG
- // cmpxchg uses EAX as an implicit operand to hold the comparand
- // We're going to destroy EAX in this operation, so we better not be keeping
- // anything important in it.
- if (RBM_EAX & regSet.rsMaskVars)
- {
- // We have a variable enregistered in EAX. Make sure it goes dead in this tree.
- for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
- {
- const LclVarDsc& varDesc = compiler->lvaTable[varNum];
- if (!varDesc.lvIsRegCandidate())
- continue;
- if (!varDesc.lvRegister)
- continue;
- if (isFloatRegType(varDesc.lvType))
- continue;
- if (varDesc.lvRegNum != REG_EAX)
- continue;
- // We may need to check lvOtherReg.
-
- // If the variable isn't going dead during this tree, we've just trashed a local with
- // cmpxchg.
- noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
-
- break;
- }
- }
-#endif
- genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
-
- // By this point we've evaluated everything. However the odds are that we've spilled something by
- // now. Let's recover all the registers and force them to stay.
-
- // Well, we just computed comparand, so it's still in EAX.
- noway_assert(comparand->gtRegNum == REG_EAX);
- regSet.rsLockUsedReg(RBM_EAX);
-
- // Stick it anywhere other than EAX.
- genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
- reg = value->gtRegNum;
- noway_assert(reg != REG_EAX);
- regSet.rsLockUsedReg(genRegMask(reg));
-
- if (isAddr)
- {
- addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
- }
- else
- {
- genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
- }
-
- regSet.rsUnlockUsedReg(genRegMask(reg));
- regSet.rsUnlockUsedReg(RBM_EAX);
-
- instGen(INS_lock);
- if (isAddr)
- {
- sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
- genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
- }
- else
- {
- instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
- genReleaseReg(location);
- }
-
- genReleaseReg(value);
- genReleaseReg(comparand);
-
- // EAX and the value register are both trashed at this point.
- regTracker.rsTrackRegTrash(REG_EAX);
- regTracker.rsTrackRegTrash(reg);
-
- reg = REG_EAX;
-
- genFlagsEqualToNone();
- break;
-#else // not defined(_TARGET_XARCH_)
- NYI("GT_CMPXCHG codegen");
- break;
-#endif
- }
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected operator");
- NO_WAY("unexpected operator");
- }
-
- noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for the given tree. tree->gtRegNum will be set to the
- * register where the tree lives.
- *
- * If 'destReg' is non-zero, we'll do our best to compute the value into a
- * register that is in that register set.
- * Use genComputeReg() if you need the tree in a specific register.
- * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
- * the register can only be used for read, but not for write.
- * Use genMakeAddressable() if you only need the tree to be accessible
- * using a complex addressing mode, and do not necessarily need the tree
- * materialized in a register.
- *
- * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
- *
- * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
- * register will not be consumed right away and could possibly be spilled.
- */
-
-void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
-#if 0
- if (compiler->verbose)
- {
- printf("Generating code for tree ");
- Compiler::printTreeID(tree);
- printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
- }
- genStressRegs(tree);
-#endif
-
- noway_assert(tree);
- noway_assert(tree->gtOper != GT_STMT);
- assert(tree->IsNodeProperlySized());
-
- // When assigning to a enregistered local variable we receive
- // a hint that we should target the register that is used to
- // hold the enregistered local variable.
- // When receiving this hint both destReg and bestReg masks are set
- // to the register that is used by the enregistered local variable.
- //
- // However it is possible to us to have a different local variable
- // targeting the same register to become alive (and later die)
- // as we descend the expression tree.
- //
- // To handle such cases we will remove any registers that are alive from the
- // both the destReg and bestReg masks.
- //
- regMaskTP liveMask = genLiveMask(tree);
-
- // This removes any registers used to hold enregistered locals
- // from the destReg and bestReg masks.
- // After this either mask could become 0
- //
- destReg &= ~liveMask;
- bestReg &= ~liveMask;
-
- /* 'destReg' of 0 really means 'any' */
-
- destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
-
- if (destReg != RBM_ALL(tree->TypeGet()))
- bestReg = regSet.rsUseIfZero(bestReg, destReg);
-
- // Long, float, and double have their own codegen functions
- switch (tree->TypeGet())
- {
-
- case TYP_LONG:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_DOUBLE:
-#endif
- genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
- return;
-
-#if CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
- case TYP_DOUBLE:
-
- // For comma nodes, we'll get back here for the last node in the comma list.
- if (tree->gtOper != GT_COMMA)
- {
- genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
- return;
- }
- break;
-#endif
-
-#ifdef DEBUG
- case TYP_UINT:
- case TYP_ULONG:
- noway_assert(!"These types are only used as markers in GT_CAST nodes");
- break;
-#endif
-
- default:
- break;
- }
-
- /* Is the value already in a register? */
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- genCodeForTree_REG_VAR1(tree);
- return;
- }
-
- /* We better not have a spilled value here */
-
- noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
-
- /* Figure out what kind of a node we have */
-
- unsigned kind = tree->OperKind();
-
- if (kind & GTK_CONST)
- {
- /* Handle constant nodes */
-
- genCodeForTreeConst(tree, destReg, bestReg);
- }
- else if (kind & GTK_LEAF)
- {
- /* Handle leaf nodes */
-
- genCodeForTreeLeaf(tree, destReg, bestReg);
- }
- else if (kind & GTK_SMPOP)
- {
- /* Handle 'simple' unary/binary operators */
-
- genCodeForTreeSmpOp(tree, destReg, bestReg);
- }
- else
- {
- /* Handle special operators */
-
- genCodeForTreeSpecialOp(tree, destReg, bestReg);
- }
-}
-
-/*****************************************************************************
- *
- * Generate code for all the basic blocks in the function.
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForBBlist()
-{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- unsigned savedStkLvl;
-
-#ifdef DEBUG
- genInterruptibleUsed = true;
- unsigned stmtNum = 0;
- unsigned totalCostEx = 0;
- unsigned totalCostSz = 0;
-
- // You have to be careful if you create basic blocks from now on
- compiler->fgSafeBasicBlockCreation = false;
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnCall)
- {
- compiler->opts.compStackCheckOnCall = false;
- }
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnRet)
- {
- compiler->opts.compStackCheckOnRet = false;
- }
-#endif
-
- // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
- genPrepForEHCodegen();
-
- assert(!compiler->fgFirstBBScratch ||
- compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
-
- /* Initialize the spill tracking logic */
-
- regSet.rsSpillBeg();
-
-#ifdef DEBUGGING_SUPPORT
- /* Initialize the line# tracking logic */
-
- if (compiler->opts.compScopeInfo)
- {
- siInit();
- }
-#endif
-
-#ifdef _TARGET_X86_
- if (compiler->compTailCallUsed)
- {
- noway_assert(isFramePointerUsed());
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
- }
-#endif
-
- if (compiler->opts.compDbgEnC)
- {
- noway_assert(isFramePointerUsed());
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
- }
-
- /* If we have any pinvoke calls, we might potentially trash everything */
-
- if (compiler->info.compCallUnmanaged)
- {
- noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
- }
-
- /* Initialize the pointer tracking code */
-
- gcInfo.gcRegPtrSetInit();
- gcInfo.gcVarPtrSetInit();
-
- /* If any arguments live in registers, mark those regs as such */
-
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
- {
- /* Is this variable a parameter assigned to a register? */
-
- if (!varDsc->lvIsParam || !varDsc->lvRegister)
- continue;
-
- /* Is the argument live on entry to the method? */
-
- if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
- continue;
-
-#if CPU_HAS_FP_SUPPORT
- /* Is this a floating-point argument? */
-
- if (varDsc->IsFloatRegType())
- continue;
-
- noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
-#endif
-
- /* Mark the register as holding the variable */
-
- if (isRegPairType(varDsc->lvType))
- {
- regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
-
- if (varDsc->lvOtherReg != REG_STK)
- regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
- }
- else
- {
- regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
- }
- }
-
- unsigned finallyNesting = 0;
-
- // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
- // allocation at the start of each basic block.
- VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
-
- /*-------------------------------------------------------------------------
- *
- * Walk the basic blocks and generate code for each one
- *
- */
-
- BasicBlock* block;
- BasicBlock* lblk; /* previous block */
-
- for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n=============== Generating ");
- block->dspBlockHeader(compiler, true, true);
- compiler->fgDispBBLiveness(block);
- }
-#endif // DEBUG
-
- VARSET_TP VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal());
-
- regMaskTP gcrefRegs = 0;
- regMaskTP byrefRegs = 0;
-
- /* Does any other block jump to this point ? */
-
- if (block->bbFlags & BBF_JMP_TARGET)
- {
- /* Someone may jump here, so trash all regs */
-
- regTracker.rsTrackRegClr();
-
- genFlagsEqualToNone();
- }
- else
- {
- /* No jump, but pointers always need to get trashed for proper GC tracking */
-
- regTracker.rsTrackRegClrPtr();
- }
-
- /* No registers are used or locked on entry to a basic block */
-
- regSet.rsMaskUsed = RBM_NONE;
- regSet.rsMaskMult = RBM_NONE;
- regSet.rsMaskLock = RBM_NONE;
-
- // If we need to reserve registers such that they are not used
- // by CodeGen in this BasicBlock we do so here.
- // On the ARM when we have large frame offsets for locals we
- // will have RBM_R10 in the regSet.rsMaskResvd set,
- // additionally if a LocAlloc or alloca is used RBM_R9 is in
- // the regSet.rsMaskResvd set and we lock these registers here.
- //
- if (regSet.rsMaskResvd != RBM_NONE)
- {
- regSet.rsLockReg(regSet.rsMaskResvd);
- regSet.rsSetRegsModified(regSet.rsMaskResvd);
- }
-
- /* Figure out which registers hold variables on entry to this block */
-
- regMaskTP specialUseMask = regSet.rsMaskResvd;
-
- specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
- regSet.ClearMaskVars();
- VarSetOps::ClearD(compiler, compiler->compCurLife);
- VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
-
-#if FEATURE_STACK_FP_X87
- VarSetOps::AssignNoCopy(compiler, genFPregVars,
- VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
- genFPregCnt = VarSetOps::Count(compiler, genFPregVars);
- genFPdeadRegCnt = 0;
-#endif
- gcInfo.gcResetForBB();
-
- genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
-#if FEATURE_STACK_FP_X87
- VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
-#endif
-
- // We should never enregister variables in any of the specialUseMask registers
- noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
-
- VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- varNum = compiler->lvaTrackedToVarNum[varIndex];
- varDsc = compiler->lvaTable + varNum;
- assert(varDsc->lvTracked);
- /* Ignore the variable if it's not not in a reg */
-
- if (!varDsc->lvRegister)
- continue;
- if (isFloatRegType(varDsc->lvType))
- continue;
-
- /* Get hold of the index and the bitmask for the variable */
- regNumber regNum = varDsc->lvRegNum;
- regMaskTP regMask = genRegMask(regNum);
-
- regSet.AddMaskVars(regMask);
-
- if (varDsc->lvType == TYP_REF)
- gcrefRegs |= regMask;
- else if (varDsc->lvType == TYP_BYREF)
- byrefRegs |= regMask;
-
- /* Mark the register holding the variable as such */
-
- if (varTypeIsMultiReg(varDsc))
- {
- regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
- if (varDsc->lvOtherReg != REG_STK)
- {
- regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
- regMask |= genRegMask(varDsc->lvOtherReg);
- }
- }
- else
- {
- regTracker.rsTrackRegLclVar(regNum, varNum);
- }
- }
-
- gcInfo.gcPtrArgCnt = 0;
-
-#if FEATURE_STACK_FP_X87
-
- regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
-
- memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
- memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
-
- // Setup fp state on block entry
- genSetupStateStackFP(block);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- JitDumpFPState();
- }
-#endif // DEBUG
-#endif // FEATURE_STACK_FP_X87
-
- /* Make sure we keep track of what pointers are live */
-
- noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
- gcInfo.gcRegGCrefSetCur = gcrefRegs;
- gcInfo.gcRegByrefSetCur = byrefRegs;
-
- /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
- represent the exception object (TYP_REF).
- We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
- to the block, it will be the first thing evaluated
- (thanks to GTF_ORDER_SIDEEFF).
- */
-
- if (handlerGetsXcptnObj(block->bbCatchTyp))
- {
- GenTreePtr firstStmt = block->FirstNonPhiDef();
- if (firstStmt != NULL)
- {
- GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
- if (compiler->gtHasCatchArg(firstTree))
- {
- gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
- }
- }
- }
-
- /* Start a new code output block */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
-#if defined(_TARGET_ARM_)
- // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
- // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
- // calls the funclet during non-exceptional control flow.
- if (block->bbFlags & BBF_FINALLY_TARGET)
- {
- assert(block->bbFlags & BBF_JMP_TARGET);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
- }
-#endif
- // Create a label that we'll use for computing the start of an EH region, if this block is
- // at the beginning of such a region. If we used the existing bbEmitCookie as is for
- // determining the EH regions, then this NOP would end up outside of the region, if this
- // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
- // would be executed, which we would prefer not to do.
-
- block->bbUnwindNopEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
-
- instGen(INS_nop);
- }
-#endif // defined(_TARGET_ARM_)
-
- genUpdateCurrentFunclet(block);
-#endif // FEATURE_EH_FUNCLETS
-
-#ifdef _TARGET_XARCH_
- if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
- {
- getEmitter()->emitLoopAlign();
- }
-#endif
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
-#endif
-
- block->bbEmitCookie = NULL;
-
- if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
- {
- /* Mark a label and update the current set of live GC refs */
-
- block->bbEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
-#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
- /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
-#else
- FALSE
-#endif
- );
- }
-
- if (block == compiler->fgFirstColdBlock)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nThis is the start of the cold region of the method\n");
- }
-#endif
- // We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
-
- // We require the block that starts the Cold section to have a label
- noway_assert(block->bbEmitCookie);
- getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
- }
-
- /* Both stacks are always empty on entry to a basic block */
-
- genStackLevel = 0;
-#if FEATURE_STACK_FP_X87
- genResetFPstkLevel();
-#endif // FEATURE_STACK_FP_X87
-
-#if !FEATURE_FIXED_OUT_ARGS
- /* Check for inserted throw blocks and adjust genStackLevel */
-
- if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
- {
- noway_assert(block->bbFlags & BBF_JMP_TARGET);
-
- genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
-
- if (genStackLevel)
- {
-#ifdef _TARGET_X86_
- getEmitter()->emitMarkStackLvl(genStackLevel);
- inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
- genStackLevel = 0;
-#else // _TARGET_X86_
- NYI("Need emitMarkStackLvl()");
-#endif // _TARGET_X86_
- }
- }
-#endif // !FEATURE_FIXED_OUT_ARGS
-
- savedStkLvl = genStackLevel;
-
- /* Tell everyone which basic block we're working on */
-
- compiler->compCurBB = block;
-
-#ifdef DEBUGGING_SUPPORT
- siBeginBlock(block);
-
- // BBF_INTERNAL blocks don't correspond to any single IL instruction.
- if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
- genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
-
- bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
-
- /*---------------------------------------------------------------------
- *
- * Generate code for each statement-tree in the block
- *
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
- if (block->bbFlags & BBF_FUNCLET_BEG)
- {
- genReserveFuncletProlog(block);
- }
-#endif // FEATURE_EH_FUNCLETS
-
- for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
- {
- noway_assert(stmt->gtOper == GT_STMT);
-
-#if defined(DEBUGGING_SUPPORT)
-
- /* Do we have a new IL-offset ? */
-
- if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
- {
- /* Create and append a new IP-mapping entry */
- genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
- firstMapping = false;
- }
-
-#endif // DEBUGGING_SUPPORT
-
-#ifdef DEBUG
- if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
- {
- noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
- if (compiler->opts.dspCode && compiler->opts.dspInstrs)
- {
- while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
- {
- genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
- }
- }
- }
-#endif // DEBUG
-
- /* Get hold of the statement tree */
- GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
-
-#ifdef DEBUG
- stmtNum++;
- if (compiler->verbose)
- {
- printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
- printf("Holding variables: ");
- dspRegMask(regSet.rsMaskVars);
- printf("\n\n");
- compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
- printf("\n");
-#if FEATURE_STACK_FP_X87
- JitDumpFPState();
-#endif
-
- printf("Execution Order:\n");
- for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
- {
- compiler->gtDispTree(treeNode, 0, NULL, true);
- }
- printf("\n");
- }
- totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
- totalCostSz += stmt->gtCostSz;
-#endif // DEBUG
-
- compiler->compCurStmt = stmt;
-
- compiler->compCurLifeTree = NULL;
- switch (tree->gtOper)
- {
- case GT_CALL:
- // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
- // reported as alive even though not used within the caller for managed debugger sake. So
- // consider the return value of the method as used if generating debuggable code.
- genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode);
- genUpdateLife(tree);
- gcInfo.gcMarkRegSetNpt(RBM_INTRET);
- break;
-
- case GT_IND:
- case GT_NULLCHECK:
-
- // Just do the side effects
- genEvalSideEffects(tree);
- break;
-
- default:
- /* Generate code for the tree */
-
- genCodeForTree(tree, 0);
- break;
- }
-
- regSet.rsSpillChk();
-
- /* The value of the tree isn't used, unless it's a return stmt */
-
- if (tree->gtOper != GT_RETURN)
- gcInfo.gcMarkRegPtrVal(tree);
-
-#if FEATURE_STACK_FP_X87
- genEndOfStatement();
-#endif
-
-#ifdef DEBUG
- /* Make sure we didn't bungle pointer register tracking */
-
- regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
-
- // If return is a GC-type, clear it. Note that if a common
- // epilog is generated (compiler->genReturnBB) it has a void return
- // even though we might return a ref. We can't use the compRetType
- // as the determiner because something we are tracking as a byref
- // might be used as a return value of a int function (which is legal)
- if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
- (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
- {
- nonVarPtrRegs &= ~RBM_INTRET;
- }
-
- // When profiling, the first statement in a catch block will be the
- // harmless "inc" instruction (does not interfere with the exception
- // object).
-
- if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) &&
- (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
- {
- nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
- }
-
- if (nonVarPtrRegs)
- {
- printf("Regset after tree=");
- Compiler::printTreeID(tree);
- printf(" BB%02u gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- printf(", regVars=");
- printRegMaskInt(regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
- printf("\n");
- }
-
- noway_assert(nonVarPtrRegs == 0);
-#endif // DEBUG
-
- noway_assert(stmt->gtOper == GT_STMT);
-
-#ifdef DEBUGGING_SUPPORT
- genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
-#endif
-
- } //-------- END-FOR each statement-tree of the current block ---------
-
-#ifdef DEBUGGING_SUPPORT
-
- if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
- {
- siEndBlock(block);
-
- /* Is this the last block, and are there any open scopes left ? */
-
- bool isLastBlockProcessed = (block->bbNext == NULL);
- if (block->isBBCallAlwaysPair())
- {
- isLastBlockProcessed = (block->bbNext->bbNext == NULL);
- }
-
- if (isLastBlockProcessed && siOpenScopeList.scNext)
- {
- /* This assert no longer holds, because we may insert a throw
- block to demarcate the end of a try or finally region when they
- are at the end of the method. It would be nice if we could fix
- our code so that this throw block will no longer be necessary. */
-
- // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
-
- siCloseAllOpenScopes();
- }
- }
-
-#endif // DEBUGGING_SUPPORT
-
- genStackLevel -= savedStkLvl;
-
- gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
-
- if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
- compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
-
- /* Both stacks should always be empty on exit from a basic block */
-
- noway_assert(genStackLevel == 0);
-#if FEATURE_STACK_FP_X87
- noway_assert(genGetFPstkLevel() == 0);
-
- // Do the FPState matching that may have to be done
- genCodeForEndBlockTransitionStackFP(block);
-#endif
-
- noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
-
- /* Do we need to generate a jump or return? */
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- inst_JMP(EJ_jmp, block->bbJumpDest);
- break;
-
- case BBJ_RETURN:
- genExitCode(block);
- break;
-
- case BBJ_THROW:
- // If we have a throw at the end of a function or funclet, we need to emit another instruction
- // afterwards to help the OS unwinder determine the correct context during unwind.
- // We insert an unexecuted breakpoint instruction in several situations
- // following a throw instruction:
- // 1. If the throw is the last instruction of the function or funclet. This helps
- // the OS unwinder determine the correct context during an unwind from the
- // thrown exception.
- // 2. If this is this is the last block of the hot section.
- // 3. If the subsequent block is a special throw block.
- if ((block->bbNext == NULL)
-#if FEATURE_EH_FUNCLETS
- || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
-#endif // FEATURE_EH_FUNCLETS
- || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
- block->bbNext == compiler->fgFirstColdBlock)
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
-
- break;
-
- case BBJ_CALLFINALLY:
-
-#if defined(_TARGET_X86_)
-
- /* If we are about to invoke a finally locally from a try block,
- we have to set the hidden slot corresponding to the finally's
- nesting level. When invoked in response to an exception, the
- EE usually does it.
-
- We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
-
- This code depends on this order not being messed up.
- We will emit :
- mov [ebp-(n+1)],0
- mov [ebp- n ],0xFC
- push &step
- jmp finallyBlock
-
- step: mov [ebp- n ],0
- jmp leaveTarget
- leaveTarget:
- */
-
- noway_assert(isFramePointerUsed());
-
- // Get the nesting level which contains the finally
- compiler->fgGetNestingLevel(block, &finallyNesting);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- filterEndOffsetSlotOffs =
- (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
-
- // Zero out the slot for the next nesting level
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs - sizeof(void*));
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs);
-
- // Now push the address of where the finally funclet should
- // return to directly.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
- getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
- }
- else
- {
- // EE expects a DWORD, so we give him 0
- inst_IV(INS_push_hide, 0);
- }
-
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
-
-#elif defined(_TARGET_ARM_)
-
- // Now set REG_LR to the address of where the finally funclet should
- // return to directly.
-
- BasicBlock* bbFinallyRet;
- bbFinallyRet = NULL;
-
- // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
- // we would have otherwise created retless calls.
- assert(block->isBBCallAlwaysPair());
-
- assert(block->bbNext != NULL);
- assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
- assert(block->bbNext->bbJumpDest != NULL);
- assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
-
- bbFinallyRet = block->bbNext->bbJumpDest;
- bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
-
-#if 0
- // We don't know the address of finally funclet yet. But adr requires the offset
- // to finally funclet from current IP is within 4095 bytes. So this code is disabled
- // for now.
- getEmitter()->emitIns_J_R (INS_adr,
- EA_4BYTE,
- bbFinallyRet,
- REG_LR);
-#else // 0
- // Load the address where the finally funclet should return into LR.
- // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
- // the return.
- getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
- getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
- regTracker.rsTrackRegTrash(REG_LR);
-#endif // 0
-
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
-#else
- NYI("TARGET");
-#endif
-
- // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
- // jump target using bbJumpDest - that is already used to point
- // to the finally block. So just skip past the BBJ_ALWAYS unless the
- // block is RETLESS.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
-
- lblk = block;
- block = block->bbNext;
- }
- break;
-
-#ifdef _TARGET_ARM_
-
- case BBJ_EHCATCHRET:
- // set r0 to the address the VM should return to after the catch
- getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
- getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
- regTracker.rsTrackRegTrash(REG_R0);
-
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
-
-#else // _TARGET_ARM_
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- case BBJ_EHCATCHRET:
- break;
-
-#endif // _TARGET_ARM_
-
- case BBJ_NONE:
- case BBJ_COND:
- case BBJ_SWITCH:
- break;
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
-
-#ifdef DEBUG
- compiler->compCurBB = 0;
-#endif
-
- } //------------------ END-FOR each block of the method -------------------
-
- /* Nothing is live at this point */
- genUpdateLife(VarSetOps::MakeEmpty(compiler));
-
- /* Finalize the spill tracking logic */
-
- regSet.rsSpillEnd();
-
- /* Finalize the temp tracking logic */
-
- compiler->tmpEnd();
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n# ");
- printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
- printf("%s\n", compiler->info.compFullName);
- }
-#endif
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-/*****************************************************************************
- *
- * Generate code for a long operation.
- * needReg is a recommendation of which registers to use for the tree.
- * For partially enregistered longs, the tree will be marked as GTF_REG_VAL
- * without loading the stack part into a register. Note that only leaf
- * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
- * enregistered so that we can know the memory location of the other half.
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg)
-{
- genTreeOps oper;
- unsigned kind;
-
- regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
- regMaskTP addrReg;
- regNumber regLo;
- regNumber regHi;
-
- noway_assert(tree);
- noway_assert(tree->gtOper != GT_STMT);
- noway_assert(genActualType(tree->gtType) == TYP_LONG);
-
- /* Figure out what kind of a node we have */
-
- oper = tree->OperGet();
- kind = tree->OperKind();
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- REG_VAR_LONG:
- regPair = tree->gtRegPair;
-
- gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
-
- goto DONE;
- }
-
- /* Is this a constant node? */
-
- if (kind & GTK_CONST)
- {
- __int64 lval;
-
- /* Pick a register pair for the value */
-
- regPair = regSet.rsPickRegPair(needReg);
-
- /* Load the value into the registers */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if !CPU_HAS_FP_SUPPORT
- if (oper == GT_CNS_DBL)
- {
- noway_assert(sizeof(__int64) == sizeof(double));
-
- noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
-
- lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
- }
- else
-#endif
- {
- noway_assert(oper == GT_CNS_LNG);
-
- lval = tree->gtLngCon.gtLconVal;
- }
-
- genSetRegToIcon(genRegPairLo(regPair), int(lval));
- genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
- goto DONE;
- }
-
- /* Is this a leaf node? */
-
- if (kind & GTK_LEAF)
- {
- switch (oper)
- {
- case GT_LCL_VAR:
-
-#if REDUNDANT_LOAD
-
- /* This case has to consider the case in which an int64 LCL_VAR
- * may both be enregistered and also have a cached copy of itself
- * in a different set of registers.
- * We want to return the registers that have the most in common
- * with the needReg mask
- */
-
- /* Does the var have a copy of itself in the cached registers?
- * And are these cached registers both free?
- * If so use these registers if they match any needReg.
- */
-
- regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
-
- if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
- ((genRegPairMask(regPair) & needReg) != RBM_NONE))
- {
- goto DONE;
- }
-
- /* Does the variable live in a register?
- * If so use these registers.
- */
- if (genMarkLclVar(tree))
- goto REG_VAR_LONG;
-
- /* If tree is not an enregistered variable then
- * be sure to use any cached register that contain
- * a copy of this local variable
- */
- if (regPair != REG_PAIR_NONE)
- {
- goto DONE;
- }
-#endif
- goto MEM_LEAF;
-
- case GT_LCL_FLD:
-
- // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
- // to worry about it being enregistered.
- noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
- goto MEM_LEAF;
-
- case GT_CLS_VAR:
- MEM_LEAF:
-
- /* Pick a register pair for the value */
-
- regPair = regSet.rsPickRegPair(needReg);
-
- /* Load the value into the registers */
-
- instruction loadIns;
-
- loadIns = ins_Load(TYP_INT); // INS_ldr
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
-#if CPU_LOAD_STORE_ARCH
- {
- regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
- inst_RV_TT(INS_lea, regAddr, tree, 0);
- regTracker.rsTrackRegTrash(regAddr);
-
- if (regLo != regAddr)
- {
- // assert(regLo != regAddr); // forced by if statement
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
- }
- else
- {
- // assert(regHi != regAddr); // implied by regpair property and the if statement
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
- }
- }
-#else
- inst_RV_TT(loadIns, regLo, tree, 0);
- inst_RV_TT(loadIns, regHi, tree, 4);
-#endif
-
-#ifdef _TARGET_ARM_
- if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- goto DONE;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected leaf");
- }
- }
-
- /* Is it a 'simple' unary/binary operator? */
-
- if (kind & GTK_SMPOP)
- {
- instruction insLo;
- instruction insHi;
- bool doLo;
- bool doHi;
- bool setCarry = false;
- int helper;
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
-
- switch (oper)
- {
- case GT_ASG:
- {
-#ifdef DEBUGGING_SUPPORT
- unsigned lclVarNum = compiler->lvaCount;
- unsigned lclVarILoffs = DUMMY_INIT(0);
-#endif
-
- /* Is the target a local ? */
-
- if (op1->gtOper == GT_LCL_VAR)
- {
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc;
-
- noway_assert(varNum < compiler->lvaCount);
- varDsc = compiler->lvaTable + varNum;
-
- // No dead stores, (with min opts we may have dead stores)
- noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
-
-#ifdef DEBUGGING_SUPPORT
- /* For non-debuggable code, every definition of a lcl-var has
- * to be checked to see if we need to open a new scope for it.
- * Remember the local var info to call siCheckVarScope
- * AFTER codegen of the assignment.
- */
- if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
- (compiler->info.compVarScopesCount > 0))
- {
- lclVarNum = varNum;
- lclVarILoffs = op1->gtLclVar.gtLclILoffs;
- }
-#endif
-
- /* Has the variable been assigned to a register (pair) ? */
-
- if (genMarkLclVar(op1))
- {
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regPair = op1->gtRegPair;
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
- noway_assert(regLo != regHi);
-
- /* Is the value being assigned a constant? */
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- /* Move the value into the target */
-
- genMakeRegPairAvailable(regPair);
-
- instruction ins;
- if (regLo == REG_STK)
- {
- ins = ins_Store(TYP_INT);
- }
- else
- {
- // Always do the stack first (in case it grabs a register it can't
- // clobber regLo this way)
- if (regHi == REG_STK)
- {
- inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
- }
- ins = INS_mov;
- }
- inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
-
- // The REG_STK case has already been handled
- if (regHi != REG_STK)
- {
- ins = INS_mov;
- inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
- }
-
- goto DONE_ASSG_REGS;
- }
-
- /* Compute the RHS into desired register pair */
-
- if (regHi != REG_STK)
- {
- genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- noway_assert(op2->gtRegPair == regPair);
- }
- else
- {
- regPairNo curPair;
- regNumber curLo;
- regNumber curHi;
-
- genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- curPair = op2->gtRegPair;
- curLo = genRegPairLo(curPair);
- curHi = genRegPairHi(curPair);
-
- /* move high first, target is on stack */
- inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
-
- if (regLo != curLo)
- {
- if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
- regSet.rsSpillReg(regLo);
- inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
- regTracker.rsTrackRegCopy(regLo, curLo);
- }
- }
-
- genReleaseRegPair(op2);
- goto DONE_ASSG_REGS;
- }
- }
-
- /* Is the value being assigned a constant? */
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
-
- /* Move the value into the target */
-
- inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
- inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- goto LAsgExit;
- }
-
-#if 0
- /* Catch a case where can avoid generating op reg, mem. Better pairing
- * from
- * mov regHi, mem
- * op regHi, reg
- *
- * To avoid problems with order of evaluation, only do this if op2 is
- * a non-enregistered local variable
- */
-
- if (GenTree::OperIsCommutative(oper) &&
- op1->gtOper == GT_LCL_VAR &&
- op2->gtOper == GT_LCL_VAR)
- {
- regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
-
- /* Is op2 a non-enregistered local variable? */
- if (regPair == REG_PAIR_NONE)
- {
- regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
-
- /* Is op1 an enregistered local variable? */
- if (regPair != REG_PAIR_NONE)
- {
- /* Swap the operands */
- GenTreePtr op = op1;
- op1 = op2;
- op2 = op;
- }
- }
- }
-#endif
-
- /* Eliminate worthless assignment "lcl = lcl" */
-
- if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
- op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
- {
- genUpdateLife(op2);
- goto LAsgExit;
- }
-
- if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
- // op1,op2 need to be materialized in the correct order.
- (tree->gtFlags & GTF_REVERSE_OPS))
- {
- /* Generate the small RHS into a register pair */
-
- GenTreePtr smallOpr = op2->gtOp.gtOp1;
-
- genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
-
- /* Make sure everything is still addressable */
-
- genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
- noway_assert(smallOpr->gtFlags & GTF_REG_VAL);
- regHi = smallOpr->gtRegNum;
- addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
-
- // conv.ovf.u8 could overflow if the original number was negative
- if (op2->gtOverflow())
- {
- noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
- 0); // conv.ovf.u8.un should be bashed to conv.u8.un
- instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- }
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
- inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
-
- /* Free up anything that was tied up by either side */
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- genReleaseReg(smallOpr);
-
-#if REDUNDANT_LOAD
- if (op1->gtOper == GT_LCL_VAR)
- {
- /* clear this local from reg table */
- regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
-
- /* mark RHS registers as containing the local var */
- regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
- }
-#endif
- goto LAsgExit;
- }
-
- /* Is the LHS more complex than the RHS? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Generate the RHS into a register pair */
-
- genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make the target addressable */
- op1 = genCodeForCommaTree(op1);
- addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
-
- /* Make sure the RHS register hasn't been spilled */
-
- genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
- }
- else
- {
- /* Make the target addressable */
-
- op1 = genCodeForCommaTree(op1);
- addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
-
- /* Generate the RHS into a register pair */
-
- genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
- }
-
- /* Lock 'op2' and make sure 'op1' is still addressable */
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regPair = op2->gtRegPair;
-
- addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
- inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
-
- /* Free up anything that was tied up by either side */
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- genReleaseRegPair(op2);
-
- DONE_ASSG_REGS:
-
-#if REDUNDANT_LOAD
-
- if (op1->gtOper == GT_LCL_VAR)
- {
- /* Clear this local from reg table */
-
- regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
-
- if ((op2->gtFlags & GTF_REG_VAL) &&
- /* constant has precedence over local */
- // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
- tree->gtOper == GT_ASG)
- {
- regNumber regNo;
-
- /* mark RHS registers as containing the local var */
-
- regNo = genRegPairLo(op2->gtRegPair);
- if (regNo != REG_STK)
- regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
-
- regNo = genRegPairHi(op2->gtRegPair);
- if (regNo != REG_STK)
- {
- /* For partially enregistered longs, we might have
- stomped on op2's hiReg */
- if (!(op1->gtFlags & GTF_REG_VAL) || regNo != genRegPairLo(op1->gtRegPair))
- {
- regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
- }
- }
- }
- }
-#endif
-
- LAsgExit:
-
- genUpdateLife(op1);
- genUpdateLife(tree);
-
-#ifdef DEBUGGING_SUPPORT
- /* For non-debuggable code, every definition of a lcl-var has
- * to be checked to see if we need to open a new scope for it.
- */
- if (lclVarNum < compiler->lvaCount)
- siCheckVarScope(lclVarNum, lclVarILoffs);
-#endif
- }
- return;
-
- case GT_SUB:
- insLo = INS_sub;
- insHi = INS_SUBC;
- setCarry = true;
- goto BINOP_OVF;
- case GT_ADD:
- insLo = INS_add;
- insHi = INS_ADDC;
- setCarry = true;
- goto BINOP_OVF;
-
- bool ovfl;
-
- BINOP_OVF:
- ovfl = tree->gtOverflow();
- goto _BINOP;
-
- case GT_AND:
- insLo = insHi = INS_AND;
- goto BINOP;
- case GT_OR:
- insLo = insHi = INS_OR;
- goto BINOP;
- case GT_XOR:
- insLo = insHi = INS_XOR;
- goto BINOP;
-
- BINOP:
- ovfl = false;
- goto _BINOP;
-
- _BINOP:
-
- /* The following makes an assumption about gtSetEvalOrder(this) */
-
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Special case: check for "(long(intval) << 32) | longval" */
-
- if (oper == GT_OR && op1->gtOper == GT_LSH)
- {
- GenTreePtr lshLHS = op1->gtOp.gtOp1;
- GenTreePtr lshRHS = op1->gtOp.gtOp2;
-
- if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
- genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
- {
-
- /* Throw away the cast of the shift operand. */
-
- op1 = lshLHS->gtCast.CastOp();
-
- /* Special case: check op2 for "ulong(intval)" */
- if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
- genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
- {
- /* Throw away the cast of the second operand. */
-
- op2 = op2->gtCast.CastOp();
- goto SIMPLE_OR_LONG;
- }
- /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
- else if (op2->gtOper == GT_AND)
- {
- GenTreePtr andLHS;
- andLHS = op2->gtOp.gtOp1;
- GenTreePtr andRHS;
- andRHS = op2->gtOp.gtOp2;
-
- if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
- andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
- genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
- {
- /* Throw away the cast of the second operand. */
-
- op2 = andLHS->gtCast.CastOp();
-
- SIMPLE_OR_LONG:
- // Load the high DWORD, ie. op1
-
- genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regHi = op1->gtRegNum;
- regSet.rsMarkRegUsed(op1);
-
- // Load the low DWORD, ie. op2
-
- genCodeForTree(op2, needReg & ~genRegMask(regHi));
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regLo = op2->gtRegNum;
-
- /* Make sure regHi is still around. Also, force
- regLo to be excluded in case regLo==regHi */
-
- genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
- regHi = op1->gtRegNum;
-
- regPair = gen2regs2pair(regLo, regHi);
- goto DONE;
- }
- }
-
- /* Generate the following sequence:
- Prepare op1 (discarding shift)
- Compute op2 into some regpair
- OR regpairhi, op1
- */
-
- /* First, make op1 addressable */
-
- /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
-
- It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
- which the long value is computed is from needReg. But at this point the safest fix is
- to exclude regSet.rsMaskResvd.
-
- Note that needReg could be the set of free registers (excluding reserved ones). If we don't
- exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
- reg from
- reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd.
- */
- regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
-
- addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
-
- genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regPair = op2->gtRegPair;
- regHi = genRegPairHi(regPair);
-
- /* The operand might have interfered with the address */
-
- addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
-
- /* Now compute the result */
-
- inst_RV_TT(insHi, regHi, op1, 0);
-
- regTracker.rsTrackRegTrash(regHi);
-
- /* Free up anything that was tied up by the LHS */
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- /* The result is where the second operand is sitting */
-
- genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
-
- regPair = op2->gtRegPair;
- goto DONE;
- }
- }
-
- /* Special case: check for "longval | (long(intval) << 32)" */
-
- if (oper == GT_OR && op2->gtOper == GT_LSH)
- {
- GenTreePtr lshLHS = op2->gtOp.gtOp1;
- GenTreePtr lshRHS = op2->gtOp.gtOp2;
-
- if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
- genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
-
- {
- /* We throw away the cast of the shift operand. */
-
- op2 = lshLHS->gtCast.CastOp();
-
- /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
-
- if (op1->gtOper == GT_AND)
- {
- GenTreePtr andLHS = op1->gtOp.gtOp1;
- GenTreePtr andRHS = op1->gtOp.gtOp2;
-
- if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
- andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
- genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
- {
- /* Throw away the cast of the first operand. */
-
- op1 = andLHS->gtCast.CastOp();
-
- // Load the low DWORD, ie. op1
-
- genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regLo = op1->gtRegNum;
- regSet.rsMarkRegUsed(op1);
-
- // Load the high DWORD, ie. op2
-
- genCodeForTree(op2, needReg & ~genRegMask(regLo));
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regHi = op2->gtRegNum;
-
- /* Make sure regLo is still around. Also, force
- regHi to be excluded in case regLo==regHi */
-
- genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
- regLo = op1->gtRegNum;
-
- regPair = gen2regs2pair(regLo, regHi);
- goto DONE;
- }
- }
-
- /* Generate the following sequence:
- Compute op1 into some regpair
- Make op2 (ignoring shift) addressable
- OR regPairHi, op2
- */
-
- // First, generate the first operand into some register
-
- genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Make the second operand addressable */
-
- addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
-
- /* Make sure the result is in a free register pair */
-
- genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
- regPair = op1->gtRegPair;
- regHi = genRegPairHi(regPair);
-
- /* The operand might have interfered with the address */
-
- addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
-
- /* Compute the new value */
-
- inst_RV_TT(insHi, regHi, op2, 0);
-
- /* The value in the high register has been trashed */
-
- regTracker.rsTrackRegTrash(regHi);
-
- goto DONE_OR;
- }
- }
-
- /* Generate the first operand into registers */
-
- if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
- ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
- {
- regPair = regSet.rsPickRegPair(needReg);
- genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
- }
- else
- {
- genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regMaskTP op1Mask;
- regPair = op1->gtRegPair;
- op1Mask = genRegPairMask(regPair);
-
- /* Make the second operand addressable */
- regMaskTP needReg2;
- needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
- addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
-
- // TODO: If 'op1' got spilled and 'op2' happens to be
- // TODO: in a register, and we have add/mul/and/or/xor,
- // TODO: reverse the operands since we can perform the
- // TODO: operation directly with the spill temp, e.g.
- // TODO: 'add regHi, [temp]'.
-
- /* Make sure the result is in a free register pair */
-
- genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
- regPair = op1->gtRegPair;
- op1Mask = genRegPairMask(regPair);
-
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* Make sure that we don't spill regLo/regHi below */
- regSet.rsLockUsedReg(op1Mask);
-
- /* The operand might have interfered with the address */
-
- addrReg = genKeepAddressable(op2, addrReg);
-
- /* The value in the register pair is about to be trashed */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Compute the new value */
-
- doLo = true;
- doHi = true;
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- __int64 icon = op2->gtLngCon.gtLconVal;
-
- /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
-
- switch (oper)
- {
- case GT_AND:
- if ((int)(icon) == -1)
- doLo = false;
- if ((int)(icon >> 32) == -1)
- doHi = false;
-
- if (!(icon & I64(0x00000000FFFFFFFF)))
- {
- genSetRegToIcon(regLo, 0);
- doLo = false;
- }
-
- if (!(icon & I64(0xFFFFFFFF00000000)))
- {
- /* Just to always set low first*/
-
- if (doLo)
- {
- inst_RV_TT(insLo, regLo, op2, 0);
- doLo = false;
- }
- genSetRegToIcon(regHi, 0);
- doHi = false;
- }
-
- break;
-
- case GT_OR:
- case GT_XOR:
- if (!(icon & I64(0x00000000FFFFFFFF)))
- doLo = false;
- if (!(icon & I64(0xFFFFFFFF00000000)))
- doHi = false;
- break;
- default:
- break;
- }
- }
-
- // Fix 383813 X86/ARM ILGEN
- // Fix 383793 ARM ILGEN
- // Fix 383911 ARM ILGEN
- regMaskTP newMask;
- newMask = addrReg & ~op1Mask;
- regSet.rsLockUsedReg(newMask);
-
- if (doLo)
- {
- insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
- }
- if (doHi)
- {
- insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
- }
-
- regSet.rsUnlockUsedReg(newMask);
- regSet.rsUnlockUsedReg(op1Mask);
-
- DONE_OR:
-
- /* Free up anything that was tied up by the LHS */
-
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- /* The result is where the first operand is sitting */
-
- genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
-
- regPair = op1->gtRegPair;
-
- if (ovfl)
- genCheckOverflow(tree);
-
- goto DONE;
-
- case GT_UMOD:
-
- regPair = genCodeForLongModInt(tree, needReg);
- goto DONE;
-
- case GT_MUL:
-
- /* Special case: both operands promoted from int */
-
- assert(tree->gtIsValid64RsltMul());
-
- /* Change to an integer multiply temporarily */
-
- tree->gtType = TYP_INT;
-
- noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
- tree->gtOp.gtOp1 = op1->gtCast.CastOp();
- tree->gtOp.gtOp2 = op2->gtCast.CastOp();
-
- assert(tree->gtFlags & GTF_MUL_64RSLT);
-
-#if defined(_TARGET_X86_)
- // imul on x86 requires EDX:EAX
- genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(tree->gtFlags & GTF_REG_VAL);
- noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
-#elif defined(_TARGET_ARM_)
- genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-#else
- assert(!"Unsupported target for 64-bit multiply codegen");
-#endif
-
- /* Restore gtType, op1 and op2 from the change above */
-
- tree->gtType = TYP_LONG;
- tree->gtOp.gtOp1 = op1;
- tree->gtOp.gtOp2 = op2;
-
-#if defined(_TARGET_X86_)
- /* The result is now in EDX:EAX */
- regPair = REG_PAIR_EAXEDX;
-#elif defined(_TARGET_ARM_)
- regPair = tree->gtRegPair;
-#endif
- goto DONE;
-
- case GT_LSH:
- helper = CORINFO_HELP_LLSH;
- goto SHIFT;
- case GT_RSH:
- helper = CORINFO_HELP_LRSH;
- goto SHIFT;
- case GT_RSZ:
- helper = CORINFO_HELP_LRSZ;
- goto SHIFT;
-
- SHIFT:
-
- noway_assert(op1->gtType == TYP_LONG);
- noway_assert(genActualType(op2->gtType) == TYP_INT);
-
- /* Is the second operand a constant? */
-
- if (op2->gtOper == GT_CNS_INT)
- {
- unsigned int count = op2->gtIntCon.gtIconVal;
-
- /* Compute the left operand into a free register pair */
-
- genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regPair = op1->gtRegPair;
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* Assume the value in the register pair is trashed. In some cases, though,
- a register might be set to zero, and we can use that information to improve
- some code generation.
- */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Generate the appropriate shift instructions */
-
- switch (oper)
- {
- case GT_LSH:
- if (count == 0)
- {
- // regHi, regLo are correct
- }
- else if (count < 32)
- {
-#if defined(_TARGET_XARCH_)
- inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
-#elif defined(_TARGET_ARM_)
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
- getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
- INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
-#else // _TARGET_*
- NYI("INS_shld");
-#endif // _TARGET_*
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
- }
- else // count >= 32
- {
- assert(count >= 32);
- if (count < 64)
- {
-#if defined(_TARGET_ARM_)
- if (count == 32)
- {
- // mov low dword into high dword (i.e. shift left by 32-bits)
- inst_RV_RV(INS_mov, regHi, regLo);
- }
- else
- {
- assert(count > 32 && count < 64);
- getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
- count - 32);
- }
-#else // _TARGET_*
- // mov low dword into high dword (i.e. shift left by 32-bits)
- inst_RV_RV(INS_mov, regHi, regLo);
- if (count > 32)
- {
- // Shift high dword left by count - 32
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
- }
-#endif // _TARGET_*
- }
- else // count >= 64
- {
- assert(count >= 64);
- genSetRegToIcon(regHi, 0);
- }
- genSetRegToIcon(regLo, 0);
- }
- break;
-
- case GT_RSH:
- if (count == 0)
- {
- // regHi, regLo are correct
- }
- else if (count < 32)
- {
-#if defined(_TARGET_XARCH_)
- inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
-#elif defined(_TARGET_ARM_)
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
- getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
- INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
-#else // _TARGET_*
- NYI("INS_shrd");
-#endif // _TARGET_*
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
- }
- else // count >= 32
- {
- assert(count >= 32);
- if (count < 64)
- {
-#if defined(_TARGET_ARM_)
- if (count == 32)
- {
- // mov high dword into low dword (i.e. shift right by 32-bits)
- inst_RV_RV(INS_mov, regLo, regHi);
- }
- else
- {
- assert(count > 32 && count < 64);
- getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
- count - 32);
- }
-#else // _TARGET_*
- // mov high dword into low dword (i.e. shift right by 32-bits)
- inst_RV_RV(INS_mov, regLo, regHi);
- if (count > 32)
- {
- // Shift low dword right by count - 32
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
- }
-#endif // _TARGET_*
- }
-
- // Propagate sign bit in high dword
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
-
- if (count >= 64)
- {
- // Propagate the sign from the high dword
- inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
- }
- }
- break;
-
- case GT_RSZ:
- if (count == 0)
- {
- // regHi, regLo are correct
- }
- else if (count < 32)
- {
-#if defined(_TARGET_XARCH_)
- inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
-#elif defined(_TARGET_ARM_)
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
- getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
- INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
-#else // _TARGET_*
- NYI("INS_shrd");
-#endif // _TARGET_*
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
- }
- else // count >= 32
- {
- assert(count >= 32);
- if (count < 64)
- {
-#if defined(_TARGET_ARM_)
- if (count == 32)
- {
- // mov high dword into low dword (i.e. shift right by 32-bits)
- inst_RV_RV(INS_mov, regLo, regHi);
- }
- else
- {
- assert(count > 32 && count < 64);
- getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
- count - 32);
- }
-#else // _TARGET_*
- // mov high dword into low dword (i.e. shift right by 32-bits)
- inst_RV_RV(INS_mov, regLo, regHi);
- if (count > 32)
- {
- // Shift low dword right by count - 32
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
- }
-#endif // _TARGET_*
- }
- else // count >= 64
- {
- assert(count >= 64);
- genSetRegToIcon(regLo, 0);
- }
- genSetRegToIcon(regHi, 0);
- }
- break;
-
- default:
- noway_assert(!"Illegal oper for long shift");
- break;
- }
-
- goto DONE_SHF;
- }
-
- /* Which operand are we supposed to compute first? */
-
- assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* The second operand can't be a constant */
-
- noway_assert(op2->gtOper != GT_CNS_INT);
-
- /* Load the shift count, hopefully into RBM_SHIFT */
- RegSet::ExactReg exactReg;
- if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
- exactReg = RegSet::EXACT_REG;
- else
- exactReg = RegSet::ANY_REG;
- genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
-
- /* Compute the left operand into REG_LNGARG_0 */
-
- genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Lock op1 so that it doesn't get trashed */
-
- regSet.rsLockUsedReg(RBM_LNGARG_0);
-
- /* Make sure the shift count wasn't displaced */
-
- genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
-
- /* Lock op2 */
-
- regSet.rsLockUsedReg(RBM_SHIFT_LNG);
- }
- else
- {
- /* Compute the left operand into REG_LNGARG_0 */
-
- genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Compute the shift count into RBM_SHIFT */
-
- genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
-
- /* Lock op2 */
-
- regSet.rsLockUsedReg(RBM_SHIFT_LNG);
-
- /* Make sure the value hasn't been displaced */
-
- genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
-
- /* Lock op1 so that it doesn't get trashed */
-
- regSet.rsLockUsedReg(RBM_LNGARG_0);
- }
-
-#ifndef _TARGET_X86_
- /* The generic helper is a C-routine and so it follows the full ABI */
- {
- /* Spill any callee-saved registers which are being used */
- regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
-
- /* But do not spill our argument registers. */
- spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
-
- if (spillRegs)
- {
- regSet.rsSpillRegs(spillRegs);
- }
- }
-#endif // !_TARGET_X86_
-
- /* Perform the shift by calling a helper function */
-
- noway_assert(op1->gtRegPair == REG_LNGARG_0);
- noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
- noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
-
- genEmitHelperCall(helper,
- 0, // argSize
- EA_8BYTE); // retSize
-
-#ifdef _TARGET_X86_
- /* The value in the register pair is trashed */
-
- regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
- regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
-#else // _TARGET_X86_
- /* The generic helper is a C-routine and so it follows the full ABI */
- regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
-#endif // _TARGET_X86_
-
- /* Release both operands */
-
- regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
- genReleaseRegPair(op1);
- genReleaseReg(op2);
-
- DONE_SHF:
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regPair = op1->gtRegPair;
- goto DONE;
-
- case GT_NEG:
- case GT_NOT:
-
- /* Generate the operand into some register pair */
-
- genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regPair = op1->gtRegPair;
-
- /* Figure out which registers the value is in */
-
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* The value in the register pair is about to be trashed */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Unary "neg": negate the value in the register pair */
- if (oper == GT_NEG)
- {
-#ifdef _TARGET_ARM_
-
- // ARM doesn't have an opcode that sets the carry bit like
- // x86, so we can't use neg/addc/neg. Instead we use subtract
- // with carry. Too bad this uses an extra register.
-
- // Lock regLo and regHi so we don't pick them, and then pick
- // a third register to be our 0.
- regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
- regSet.rsLockReg(regPairMask);
- regMaskTP regBest = RBM_ALLINT & ~avoidReg;
- regNumber regZero = genGetRegSetToIcon(0, regBest);
- regSet.rsUnlockReg(regPairMask);
-
- inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
- getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
-
-#elif defined(_TARGET_XARCH_)
-
- inst_RV(INS_NEG, regLo, TYP_LONG);
- inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
- inst_RV(INS_NEG, regHi, TYP_LONG);
-#else
- NYI("GT_NEG on TYP_LONG");
-#endif
- }
- else
- {
- /* Unary "not": flip all the bits in the register pair */
-
- inst_RV(INS_NOT, regLo, TYP_LONG);
- inst_RV(INS_NOT, regHi, TYP_LONG);
- }
-
- goto DONE;
-
-#if LONG_ASG_OPS
-
- case GT_ASG_OR:
- insLo = insHi = INS_OR;
- goto ASG_OPR;
- case GT_ASG_XOR:
- insLo = insHi = INS_XOR;
- goto ASG_OPR;
- case GT_ASG_AND:
- insLo = insHi = INS_AND;
- goto ASG_OPR;
- case GT_ASG_SUB:
- insLo = INS_sub;
- insHi = INS_SUBC;
- goto ASG_OPR;
- case GT_ASG_ADD:
- insLo = INS_add;
- insHi = INS_ADDC;
- goto ASG_OPR;
-
- ASG_OPR:
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- __int64 lval = op2->gtLngCon.gtLconVal;
-
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
-
- /* Optimize some special cases */
-
- doLo = doHi = true;
-
- /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
-
- switch (oper)
- {
- case GT_ASG_AND:
- if ((int)(lval) == -1)
- doLo = false;
- if ((int)(lval >> 32) == -1)
- doHi = false;
- break;
-
- case GT_ASG_OR:
- case GT_ASG_XOR:
- if (!(lval & 0x00000000FFFFFFFF))
- doLo = false;
- if (!(lval & 0xFFFFFFFF00000000))
- doHi = false;
- break;
- }
-
- if (doLo)
- inst_TT_IV(insLo, op1, (int)(lval), 0);
- if (doHi)
- inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
-
- bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
- if (doLo || doHi)
- tree->gtFlags |= GTF_ZSF_SET;
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
- goto DONE_ASSG_REGS;
- }
-
- /* TODO: allow non-const long assignment operators */
-
- noway_assert(!"non-const long asgop NYI");
-
-#endif // LONG_ASG_OPS
-
- case GT_IND:
- case GT_NULLCHECK:
- {
- regMaskTP tmpMask;
- int hiFirst;
-
- regMaskTP availMask = RBM_ALLINT & ~needReg;
-
- /* Make sure the operand is addressable */
-
- addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
-
- GenTreePtr addr = oper == GT_IND ? op1 : tree;
-
- /* Pick a register for the value */
-
- regPair = regSet.rsPickRegPair(needReg);
- tmpMask = genRegPairMask(regPair);
-
- /* Is there any overlap between the register pair and the address? */
-
- hiFirst = FALSE;
-
- if (tmpMask & addrReg)
- {
- /* Does one or both of the target registers overlap? */
-
- if ((tmpMask & addrReg) != tmpMask)
- {
- /* Only one register overlaps */
-
- noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
-
- /* If the low register overlaps, load the upper half first */
-
- if (addrReg & genRegMask(genRegPairLo(regPair)))
- hiFirst = TRUE;
- }
- else
- {
- regMaskTP regFree;
-
- /* The register completely overlaps with the address */
-
- noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
-
- /* Can we pick another pair easily? */
-
- regFree = regSet.rsRegMaskFree() & ~addrReg;
- if (needReg)
- regFree &= needReg;
-
- /* More than one free register available? */
-
- if (regFree && !genMaxOneBit(regFree))
- {
- regPair = regSet.rsPickRegPair(regFree);
- tmpMask = genRegPairMask(regPair);
- }
- else
- {
- // printf("Overlap: needReg = %08X\n", needReg);
-
- // Reg-prediction won't allow this
- noway_assert((regSet.rsMaskVars & addrReg) == 0);
-
- // Grab one fresh reg, and use any one of addrReg
-
- if (regFree) // Try to follow 'needReg'
- regLo = regSet.rsGrabReg(regFree);
- else // Pick any reg besides addrReg
- regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
-
- unsigned regBit = 0x1;
- regNumber regNo;
-
- for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
- {
- // Found one of addrReg. Use it.
- if (regBit & addrReg)
- break;
- }
- noway_assert(genIsValidReg(regNo)); // Should have found regNo
-
- regPair = gen2regs2pair(regLo, regNo);
- tmpMask = genRegPairMask(regPair);
- }
- }
- }
-
- /* Make sure the value is still addressable */
-
- noway_assert(genStillAddressable(tree));
-
- /* Figure out which registers the value is in */
-
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* The value in the register pair is about to be trashed */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Load the target registers from where the value is */
-
- if (hiFirst)
- {
- inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
- regSet.rsLockReg(genRegMask(regHi));
- inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
- regSet.rsUnlockReg(genRegMask(regHi));
- }
- else
- {
- inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
- regSet.rsLockReg(genRegMask(regLo));
- inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
- regSet.rsUnlockReg(genRegMask(regLo));
- }
-
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-
- genUpdateLife(tree);
- genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
- }
- goto DONE;
-
- case GT_CAST:
-
- /* What are we casting from? */
-
- switch (op1->gtType)
- {
- case TYP_BOOL:
- case TYP_BYTE:
- case TYP_CHAR:
- case TYP_SHORT:
- case TYP_INT:
- case TYP_UBYTE:
- case TYP_BYREF:
- {
- regMaskTP hiRegMask;
- regMaskTP loRegMask;
-
- // For an unsigned cast we don't need to sign-extend the 32 bit value
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- // Does needReg have exactly two bits on and thus
- // specifies the exact register pair that we want to use
- if (!genMaxOneBit(needReg))
- {
- regPair = regSet.rsFindRegPairNo(needReg);
- if (needReg != genRegPairMask(regPair))
- goto ANY_FREE_REG_UNSIGNED;
- loRegMask = genRegMask(genRegPairLo(regPair));
- if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
- goto ANY_FREE_REG_UNSIGNED;
- hiRegMask = genRegMask(genRegPairHi(regPair));
- }
- else
- {
- ANY_FREE_REG_UNSIGNED:
- loRegMask = needReg;
- hiRegMask = needReg;
- }
-
- genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regLo = op1->gtRegNum;
- loRegMask = genRegMask(regLo);
- regSet.rsLockUsedReg(loRegMask);
- regHi = regSet.rsPickReg(hiRegMask);
- regSet.rsUnlockUsedReg(loRegMask);
-
- regPair = gen2regs2pair(regLo, regHi);
-
- // Move 0 to the higher word of the ULong
- genSetRegToIcon(regHi, 0, TYP_INT);
-
- /* We can now free up the operand */
- genReleaseReg(op1);
-
- goto DONE;
- }
-#ifdef _TARGET_XARCH_
- /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
- and we need the result to be in those registers.
- cdq is smaller so we use it for SMALL_CODE
- */
-
- if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
- (regSet.rsRegMaskFree() & RBM_EDX))
- {
- genCodeForTree(op1, RBM_EAX);
- regSet.rsMarkRegUsed(op1);
-
- /* If we have to spill EDX, might as well use the faster
- sar as the spill will increase code size anyway */
-
- if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
- {
- hiRegMask = regSet.rsRegMaskFree();
- goto USE_SAR_FOR_CAST;
- }
-
- regSet.rsGrabReg(RBM_EDX);
- regTracker.rsTrackRegTrash(REG_EDX);
-
- /* Convert the int in EAX into a long in EDX:EAX */
-
- instGen(INS_cdq);
-
- /* The result is in EDX:EAX */
-
- regPair = REG_PAIR_EAXEDX;
- }
- else
-#endif
- {
- /* use the sar instruction to sign-extend a 32-bit integer */
-
- // Does needReg have exactly two bits on and thus
- // specifies the exact register pair that we want to use
- if (!genMaxOneBit(needReg))
- {
- regPair = regSet.rsFindRegPairNo(needReg);
- if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
- goto ANY_FREE_REG_SIGNED;
- loRegMask = genRegMask(genRegPairLo(regPair));
- if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
- goto ANY_FREE_REG_SIGNED;
- hiRegMask = genRegMask(genRegPairHi(regPair));
- }
- else
- {
- ANY_FREE_REG_SIGNED:
- loRegMask = needReg;
- hiRegMask = RBM_NONE;
- }
-
- genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
-#ifdef _TARGET_XARCH_
- USE_SAR_FOR_CAST:
-#endif
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regLo = op1->gtRegNum;
- loRegMask = genRegMask(regLo);
- regSet.rsLockUsedReg(loRegMask);
- regHi = regSet.rsPickReg(hiRegMask);
- regSet.rsUnlockUsedReg(loRegMask);
-
- regPair = gen2regs2pair(regLo, regHi);
-
-#ifdef _TARGET_ARM_
- /* Copy the lo32 bits from regLo to regHi and sign-extend it */
- // Use one instruction instead of two
- getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
-#else
- /* Copy the lo32 bits from regLo to regHi and sign-extend it */
- inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
-#endif
-
- /* The value in the upper register is trashed */
-
- regTracker.rsTrackRegTrash(regHi);
- }
-
- /* We can now free up the operand */
- genReleaseReg(op1);
-
- // conv.ovf.u8 could overflow if the original number was negative
- if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
- {
- regNumber hiReg = genRegPairHi(regPair);
- instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- }
- }
- goto DONE;
-
- case TYP_FLOAT:
- case TYP_DOUBLE:
-
-#if 0
- /* Load the FP value onto the coprocessor stack */
-
- genCodeForTreeFlt(op1);
-
- /* Allocate a temp for the long value */
-
- temp = compiler->tmpGetTemp(TYP_LONG);
-
- /* Store the FP value into the temp */
-
- inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
- genFPstkLevel--;
-
- /* Pick a register pair for the value */
-
- regPair = regSet.rsPickRegPair(needReg);
-
- /* Figure out which registers the value is in */
-
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* The value in the register pair is about to be trashed */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Load the converted value into the registers */
-
- inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
- inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
-
- /* We no longer need the temp */
-
- compiler->tmpRlsTemp(temp);
- goto DONE;
-#else
- NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
- break;
-#endif
- case TYP_LONG:
- case TYP_ULONG:
- {
- noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
-
- genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
- regPair = op1->gtRegPair;
-
- // Do we need to set the sign-flag, or can we checked if it is set?
- // and not do this "test" if so.
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- regNumber hiReg = genRegPairHi(op1->gtRegPair);
- noway_assert(hiReg != REG_STK);
- instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
- }
-
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- }
- goto DONE;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- NO_WAY("unexpected cast to long");
- }
- break;
-
- case GT_RETURN:
-
- /* TODO:
- * This code is cloned from the regular processing of GT_RETURN values. We have to remember to
- * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really
- * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
- */
-
- // TODO: this should be done AFTER we called exit mon so that
- // we are sure that we don't have to keep 'this' alive
-
- if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
- {
- /* either it's an "empty" statement or the return statement
- of a synchronized method
- */
-
- genPInvokeMethodEpilog();
- }
-
-#if CPU_LONG_USES_REGPAIR
- /* There must be a long return value */
-
- noway_assert(op1);
-
- /* Evaluate the return value into EDX:EAX */
-
- genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegPair == REG_LNGRET);
-
-#else
- NYI("64-bit return");
-#endif
-
-#ifdef PROFILING_SUPPORTED
- // The profiling hook does not trash registers, so it's safe to call after we emit the code for
- // the GT_RETURN tree.
-
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- genProfilingLeaveCallback();
- }
-#endif
- return;
-
- case GT_QMARK:
- noway_assert(!"inliner-generated ?: for longs NYI");
- NO_WAY("inliner-generated ?: for longs NYI");
- break;
-
- case GT_COMMA:
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- // Generate op2
- genCodeForTreeLng(op2, needReg, avoidReg);
- genUpdateLife(op2);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- regSet.rsMarkRegPairUsed(op2);
-
- // Do side effects of op1
- genEvalSideEffects(op1);
-
- // Recover op2 if spilled
- genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
-
- genReleaseRegPair(op2);
-
- genUpdateLife(tree);
-
- regPair = op2->gtRegPair;
- }
- else
- {
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Generate side effects of the first operand */
-
- genEvalSideEffects(op1);
- genUpdateLife(op1);
-
- /* Is the value of the second operand used? */
-
- if (tree->gtType == TYP_VOID)
- {
- /* The right operand produces no result */
-
- genEvalSideEffects(op2);
- genUpdateLife(tree);
- return;
- }
-
- /* Generate the second operand, i.e. the 'real' value */
-
- genCodeForTreeLng(op2, needReg, avoidReg);
-
- /* The result of 'op2' is also the final result */
-
- regPair = op2->gtRegPair;
- }
-
- goto DONE;
-
- case GT_BOX:
- {
- /* Generate the operand, i.e. the 'real' value */
-
- genCodeForTreeLng(op1, needReg, avoidReg);
-
- /* The result of 'op1' is also the final result */
-
- regPair = op1->gtRegPair;
- }
-
- goto DONE;
-
- case GT_NOP:
- if (op1 == NULL)
- return;
-
- genCodeForTreeLng(op1, needReg, avoidReg);
- regPair = op1->gtRegPair;
- goto DONE;
-
- default:
- break;
- }
-
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected 64-bit operator");
- }
-
- /* See what kind of a special operator we have here */
-
- switch (oper)
- {
- regMaskTP retMask;
- case GT_CALL:
- retMask = genCodeForCall(tree, true);
- if (retMask == RBM_NONE)
- regPair = REG_PAIR_NONE;
- else
- regPair = regSet.rsFindRegPairNo(retMask);
- break;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- NO_WAY("unexpected long operator");
- }
-
-DONE:
-
- genUpdateLife(tree);
-
- /* Here we've computed the value of 'tree' into 'regPair' */
-
- noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
-
- genMarkTreeInRegPair(tree, regPair);
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-/*****************************************************************************
- *
- * Generate code for a mod of a long by an int.
- */
-
-regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg)
-{
-#ifdef _TARGET_X86_
-
- regPairNo regPair;
- regMaskTP addrReg;
-
- genTreeOps oper = tree->OperGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
-
- /* Codegen only for Unsigned MOD */
- noway_assert(oper == GT_UMOD);
-
- /* op2 must be a long constant in the range 2 to 0x3fffffff */
-
- noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
- (op2->gtLngCon.gtLconVal <= 0x3fffffff));
- int val = (int)op2->gtLngCon.gtLconVal;
-
- op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
-
- op2->gtType = TYP_INT;
- op2->gtIntCon.gtIconVal = val;
-
- /* Which operand are we supposed to compute first? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Compute the second operand into a scratch register, other
- than EAX or EDX */
-
- needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
-
- /* Special case: if op2 is a local var we are done */
-
- if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
- {
- addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
- }
- else
- {
- genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- addrReg = genRegMask(op2->gtRegNum);
- }
-
- /* Compute the first operand into EAX:EDX */
-
- genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegPair == REG_PAIR_TMP);
-
- /* And recover the second argument while locking the first one */
-
- addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
- }
- else
- {
- /* Compute the first operand into EAX:EDX */
-
- genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegPair == REG_PAIR_TMP);
-
- /* Compute the second operand into a scratch register, other
- than EAX or EDX */
-
- needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
-
- /* Special case: if op2 is a local var we are done */
-
- if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
- {
- addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
- }
- else
- {
- genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- addrReg = genRegMask(op2->gtRegNum);
- }
-
- /* Recover the first argument */
-
- genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
-
- /* And recover the second argument while locking the first one */
-
- addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
- }
-
- /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
- contains the 32bit divisor. We want to generate the following code:
-
- ==========================
- Unsigned (GT_UMOD)
-
- cmp edx, op2->gtRegNum
- jb lab_no_overflow
-
- mov temp, eax
- mov eax, edx
- xor edx, edx
- div op2->g2RegNum
- mov eax, temp
-
- lab_no_overflow:
- idiv
- ==========================
- This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
- */
-
- BasicBlock* lab_no_overflow = genCreateTempLabel();
-
- // grab a temporary register other than eax, edx, and op2->gtRegNum
-
- regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
-
- // EAX and tempReg will be trashed by the mov instructions. Doing
- // this early won't hurt, and might prevent confusion in genSetRegToIcon.
-
- regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
- regTracker.rsTrackRegTrash(tempReg);
-
- inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
- inst_JMP(EJ_jb, lab_no_overflow);
-
- inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
- inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
- genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
- inst_TT(INS_UNSIGNED_DIVIDE, op2);
- inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
-
- // Jump point for no overflow divide
-
- genDefineTempLabel(lab_no_overflow);
-
- // Issue the divide instruction
-
- inst_TT(INS_UNSIGNED_DIVIDE, op2);
-
- /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
-
- regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
- regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
- regTracker.rsTrackRegTrash(tempReg);
- regTracker.rsTrackRegTrash(op2->gtRegNum);
-
- if (tree->gtFlags & GTF_MOD_INT_RESULT)
- {
- /* We don't need to normalize the result, because the caller wants
- an int (in edx) */
-
- regPair = REG_PAIR_TMP_REVERSE;
- }
- else
- {
- /* The result is now in EDX, we now have to normalize it, i.e. we have
- to issue:
- mov eax, edx; xor edx, edx (for UMOD)
- */
-
- inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
-
- genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
-
- regPair = REG_PAIR_TMP;
- }
-
- genReleaseRegPair(op1);
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- return regPair;
-
-#else // !_TARGET_X86_
-
- NYI("codegen for LongModInt");
-
- return REG_PAIR_NONE;
-
-#endif // !_TARGET_X86_
-}
-
-// Given a tree, return the number of registers that are currently
-// used to hold integer enregistered local variables.
-// Note that, an enregistered TYP_LONG can take 1 or 2 registers.
-unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
-{
- unsigned regCount = 0;
-
- VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
- while (iter.NextElem(compiler, &varNum))
- {
- unsigned lclNum = compiler->lvaTrackedToVarNum[varNum];
- LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
-
- if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
- {
- ++regCount;
-
- if (varTypeIsLong(varDsc->TypeGet()))
- {
- // For enregistered LONG/ULONG, the lower half should always be in a register.
- noway_assert(varDsc->lvRegNum != REG_STK);
-
- // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
- // well.
- if (varDsc->lvOtherReg != REG_STK)
- {
- ++regCount;
- }
- }
- }
- }
-
- return regCount;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-#if CPU_HAS_FP_SUPPORT
-/*****************************************************************************
- *
- * Generate code for a floating-point operation.
- */
-
-void CodeGen::genCodeForTreeFlt(GenTreePtr tree,
- regMaskTP needReg, /* = RBM_ALLFLOAT */
- regMaskTP bestReg) /* = RBM_NONE */
-{
- genCodeForTreeFloat(tree, needReg, bestReg);
-
- if (tree->OperGet() == GT_RETURN)
- {
- // Make sure to get ALL THE EPILOG CODE
-
- // TODO: this should be done AFTER we called exit mon so that
- // we are sure that we don't have to keep 'this' alive
-
- if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
- {
- /* either it's an "empty" statement or the return statement
- of a synchronized method
- */
-
- genPInvokeMethodEpilog();
- }
-
-#ifdef PROFILING_SUPPORTED
- // The profiling hook does not trash registers, so it's safe to call after we emit the code for
- // the GT_RETURN tree.
-
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- genProfilingLeaveCallback();
- }
-#endif
- }
-}
-
-/*****************************************************************************/
-#endif // CPU_HAS_FP_SUPPORT
-
-/*****************************************************************************
- *
- * Generate a table switch - the switch value (0-based) is in register 'reg'.
- */
-
-void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
-{
- unsigned jmpTabBase;
-
- if (jumpCnt == 1)
- {
- // In debug code, we don't optimize away the trivial switch statements. So we can get here with a
- // BBJ_SWITCH with only a default case. Therefore, don't generate the switch table.
- noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
- inst_JMP(EJ_jmp, jumpTab[0]);
- return;
- }
-
- noway_assert(jumpCnt >= 2);
-
- /* Is the number of cases right for a test and jump switch? */
-
- const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
- const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
- const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
-
- unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
-
- // This means really just a single cmp/jcc (aka a simple if/else)
- if (fFirstCaseFollows || fDefaultFollows)
- minSwitchTabJumpCnt++;
-
-#ifdef _TARGET_ARM_
- // On the ARM for small switch tables we will
- // generate a sequence of compare and branch instructions
- // because the code to load the base of the switch
- // table is huge and hideous due to the relocation... :(
- //
- minSwitchTabJumpCnt++;
- if (fHaveScratchReg)
- minSwitchTabJumpCnt++;
-
-#endif // _TARGET_ARM_
-
- if (jumpCnt < minSwitchTabJumpCnt)
- {
- /* Does the first case label follow? */
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
-
- if (fFirstCaseFollows)
- {
- /* Check for the default case */
- inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
-
- /* No need to jump to the first case */
-
- jumpCnt -= 2;
- jumpTab += 1;
-
- /* Generate a series of "dec reg; jmp label" */
-
- // Make sure that we can trash the register so
- // that we can generate a series of compares and jumps
- //
- if ((jumpCnt > 0) && !fHaveScratchReg)
- {
- regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
- inst_RV_RV(INS_mov, tmpReg, reg);
- regTracker.rsTrackRegTrash(tmpReg);
- reg = tmpReg;
- }
-
- while (jumpCnt > 0)
- {
- inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
- inst_JMP(jmpEqual, *jumpTab++);
- jumpCnt--;
- }
- }
- else
- {
- /* Check for case0 first */
- instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
- inst_JMP(jmpEqual, *jumpTab);
-
- /* No need to jump to the first case or the default */
-
- jumpCnt -= 2;
- jumpTab += 1;
-
- /* Generate a series of "dec reg; jmp label" */
-
- // Make sure that we can trash the register so
- // that we can generate a series of compares and jumps
- //
- if ((jumpCnt > 0) && !fHaveScratchReg)
- {
- regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
- inst_RV_RV(INS_mov, tmpReg, reg);
- regTracker.rsTrackRegTrash(tmpReg);
- reg = tmpReg;
- }
-
- while (jumpCnt > 0)
- {
- inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
- inst_JMP(jmpEqual, *jumpTab++);
- jumpCnt--;
- }
-
- if (!fDefaultFollows)
- {
- inst_JMP(EJ_jmp, *jumpTab);
- }
- }
-
- if ((fFirstCaseFollows || fDefaultFollows) &&
- compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
- {
- inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
- }
-
- return;
- }
-
- /* First take care of the default case */
-
- inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
-
- /* Generate the jump table contents */
-
- jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
-#endif
-
- for (unsigned index = 0; index < jumpCnt - 1; index++)
- {
- BasicBlock* target = jumpTab[index];
-
- noway_assert(target->bbFlags & BBF_JMP_TARGET);
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
-#endif
-
- getEmitter()->emitDataGenData(index, target);
- }
-
- getEmitter()->emitDataGenEnd();
-
-#ifdef _TARGET_ARM_
- // We need to load the address of the table into a register.
- // The data section might get placed a long distance away, so we
- // can't safely do a PC-relative ADR. :(
- // Pick any register except the index register.
- //
- regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
- getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
- getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
- regTracker.rsTrackRegTrash(regTabBase);
-
- // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
- getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
-
-#else // !_TARGET_ARM_
-
- getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
-
-#endif
-}
-
-/*****************************************************************************
- *
- * Generate code for a switch statement.
- */
-
-void CodeGen::genCodeForSwitch(GenTreePtr tree)
-{
- unsigned jumpCnt;
- BasicBlock** jumpTab;
-
- GenTreePtr oper;
- regNumber reg;
-
- noway_assert(tree->gtOper == GT_SWITCH);
- oper = tree->gtOp.gtOp1;
- noway_assert(genActualTypeIsIntOrI(oper->gtType));
-
- /* Get hold of the jump table */
-
- noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
-
- jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
- jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
-
- /* Compute the switch value into some register */
-
- genCodeForTree(oper, 0);
-
- /* Get hold of the register the value is in */
-
- noway_assert(oper->gtFlags & GTF_REG_VAL);
- reg = oper->gtRegNum;
-
-#if FEATURE_STACK_FP_X87
- if (!compCurFPState.IsEmpty())
- {
- return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
- }
- else
-#endif // FEATURE_STACK_FP_X87
- {
- return genTableSwitch(reg, jumpCnt, jumpTab);
- }
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- * Emit a call to a helper function.
- */
-
-// inline
-void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
-{
- // Can we call the helper function directly
-
- void *addr = NULL, **pAddr = NULL;
-
-#if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
- // Don't ask VM if it hasn't requested ELT hooks
- if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
- (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
- helper == CORINFO_HELP_PROF_FCN_TAILCALL))
- {
- addr = compiler->compProfilerMethHnd;
- }
- else
-#endif
- {
- addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
- }
-
-#ifdef _TARGET_ARM_
- if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
- {
- // Load the address into a register and call through a register
- regNumber indCallReg =
- regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
- if (addr)
- {
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
- }
- else
- {
- getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
- regTracker.rsTrackRegTrash(indCallReg);
- }
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
- INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
- argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur,
- BAD_IL_OFFSET, // ilOffset
- indCallReg, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, // isJump
- emitter::emitNoGChelper(helper),
- (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
- }
- else
- {
- getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
- INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
- 0, /* ilOffset, ireg, xreg, xmul, disp */
- false, /* isJump */
- emitter::emitNoGChelper(helper),
- (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
- }
-#else
-
- {
- emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
-
- if (!addr)
- {
- callType = emitter::EC_FUNC_TOKEN_INDIR;
- addr = pAddr;
- }
-
- getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
- argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
- 0, /* ilOffset, ireg, xreg, xmul, disp */
- false, /* isJump */
- emitter::emitNoGChelper(helper));
- }
-#endif
-
- regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
- regTracker.rsTrashRegsForGCInterruptability();
-}
-
-/*****************************************************************************
- *
- * Push the given registers.
- * This function does not check if the register is marked as used, etc.
- */
-
-regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
-{
- *byrefRegs = RBM_NONE;
- *noRefRegs = RBM_NONE;
-
- // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
-
- if (regs == RBM_NONE)
- return RBM_NONE;
-
-#if FEATURE_FIXED_OUT_ARGS
-
- NYI("Don't call genPushRegs with real regs!");
- return RBM_NONE;
-
-#else // FEATURE_FIXED_OUT_ARGS
-
- noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
- noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
-
- regMaskTP pushedRegs = regs;
-
- for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
- {
- regMaskTP regBit = regMaskTP(1) << reg;
-
- if ((regBit & regs) == RBM_NONE)
- continue;
-
- var_types type;
- if (regBit & gcInfo.gcRegGCrefSetCur)
- {
- type = TYP_REF;
- }
- else if (regBit & gcInfo.gcRegByrefSetCur)
- {
- *byrefRegs |= regBit;
- type = TYP_BYREF;
- }
- else if (noRefRegs != NULL)
- {
- *noRefRegs |= regBit;
- type = TYP_I_IMPL;
- }
- else
- {
- continue;
- }
-
- inst_RV(INS_push, reg, type);
-
- genSinglePush();
- gcInfo.gcMarkRegSetNpt(regBit);
-
- regs &= ~regBit;
- }
-
- return pushedRegs;
-
-#endif // FEATURE_FIXED_OUT_ARGS
-}
-
-/*****************************************************************************
- *
- * Pop the registers pushed by genPushRegs()
- */
-
-void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
-{
- if (regs == RBM_NONE)
- return;
-
-#if FEATURE_FIXED_OUT_ARGS
-
- NYI("Don't call genPopRegs with real regs!");
-
-#else // FEATURE_FIXED_OUT_ARGS
-
- noway_assert((regs & byrefRegs) == byrefRegs);
- noway_assert((regs & noRefRegs) == noRefRegs);
- // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
- noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
-
- noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
- noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
-
- // Walk the registers in the reverse order as genPushRegs()
- for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
- {
- regMaskTP regBit = regMaskTP(1) << reg;
-
- if ((regBit & regs) == RBM_NONE)
- continue;
-
- var_types type;
- if (regBit & byrefRegs)
- {
- type = TYP_BYREF;
- }
- else if (regBit & noRefRegs)
- {
- type = TYP_INT;
- }
- else
- {
- type = TYP_REF;
- }
-
- inst_RV(INS_pop, reg, type);
- genSinglePop();
-
- if (type != TYP_INT)
- gcInfo.gcMarkRegPtrVal(reg, type);
-
- regs &= ~regBit;
- }
-
-#endif // FEATURE_FIXED_OUT_ARGS
-}
-
-/*****************************************************************************
- *
- * Push the given argument list, right to left; returns the total amount of
- * stuff pushed.
- */
-
-#if !FEATURE_FIXED_OUT_ARGS
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-size_t CodeGen::genPushArgList(GenTreePtr call)
-{
- GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs;
- size_t size = 0;
- regMaskTP addrReg;
-
- GenTreeArgList* args;
- // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
- // so we can iterate over this argument list more uniformly.
- // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
- GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
- if (call->gtCall.gtCallObjp == NULL)
- {
- args = call->gtCall.gtCallArgs;
- }
- else
- {
- firstForObjp.Current() = call->gtCall.gtCallObjp;
- args = &firstForObjp;
- }
-
- GenTreePtr curr;
- var_types type;
- size_t opsz;
-
- for (; args; args = args->Rest())
- {
- addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
-
- /* Get hold of the next argument value */
- curr = args->Current();
-
- if (curr->IsArgPlaceHolderNode())
- {
- assert(curr->gtFlags & GTF_LATE_ARG);
-
- addrReg = 0;
- continue;
- }
-
- // If we have a comma expression, eval the non-last, then deal with the last.
- if (!(curr->gtFlags & GTF_LATE_ARG))
- curr = genCodeForCommaTree(curr);
-
- /* See what type of a value we're passing */
- type = curr->TypeGet();
-
- opsz = genTypeSize(genActualType(type));
-
- switch (type)
- {
- case TYP_BOOL:
- case TYP_BYTE:
- case TYP_SHORT:
- case TYP_CHAR:
- case TYP_UBYTE:
-
- /* Don't want to push a small value, make it a full word */
-
- genCodeForTree(curr, 0);
-
- __fallthrough; // now the value should be in a register ...
-
- case TYP_INT:
- case TYP_REF:
- case TYP_BYREF:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
-#endif
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- assert(curr->gtOper == GT_ASG);
- /* one more argument will be passed in a register */
- noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
-
- /* arg is passed in the register, nothing on the stack */
-
- opsz = 0;
- }
-
- /* Is this value a handle? */
-
- if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
- {
- /* Emit a fixup for the push instruction */
-
- inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
- genSinglePush();
-
- addrReg = 0;
- break;
- }
-
- /* Is the value a constant? */
-
- if (curr->gtOper == GT_CNS_INT)
- {
-
-#if REDUNDANT_LOAD
- regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
-
- if (reg != REG_NA)
- {
- inst_RV(INS_push, reg, TYP_INT);
- }
- else
-#endif
- {
- inst_IV(INS_push, curr->gtIntCon.gtIconVal);
- }
-
- /* If the type is TYP_REF, then this must be a "null". So we can
- treat it as a TYP_INT as we don't need to report it as a GC ptr */
-
- noway_assert(curr->TypeGet() == TYP_INT ||
- (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
-
- genSinglePush();
-
- addrReg = 0;
- break;
- }
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- /* This must be a register arg temp assignment */
-
- noway_assert(curr->gtOper == GT_ASG);
-
- /* Evaluate it to the temp */
-
- genCodeForTree(curr, 0);
-
- /* Increment the current argument register counter */
-
- intRegState.rsCurRegArgNum++;
-
- addrReg = 0;
- }
- else
- {
- /* This is a 32-bit integer non-register argument */
-
- addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
- inst_TT(INS_push, curr);
- genSinglePush();
- genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
- }
- break;
-
- case TYP_LONG:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_DOUBLE:
-#endif
-
- /* Is the value a constant? */
-
- if (curr->gtOper == GT_CNS_LNG)
- {
- inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
- genSinglePush();
- inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
- genSinglePush();
-
- addrReg = 0;
- }
- else
- {
- addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
-
- inst_TT(INS_push, curr, sizeof(int));
- genSinglePush();
- inst_TT(INS_push, curr);
- genSinglePush();
- }
- break;
-
-#if CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
- case TYP_DOUBLE:
-#endif
-#if FEATURE_STACK_FP_X87
- addrReg = genPushArgumentStackFP(curr);
-#else
- NYI("FP codegen");
- addrReg = 0;
-#endif
- break;
-
- case TYP_VOID:
-
- /* Is this a nothing node, deferred register argument? */
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- GenTree* arg = curr;
- if (arg->gtOper == GT_COMMA)
- {
- while (arg->gtOper == GT_COMMA)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- arg = arg->gtOp.gtOp2;
- }
- if (!arg->IsNothingNode())
- {
- genEvalSideEffects(arg);
- genUpdateLife(arg);
- }
- }
-
- /* increment the register count and continue with the next argument */
-
- intRegState.rsCurRegArgNum++;
-
- noway_assert(opsz == 0);
-
- addrReg = 0;
- break;
- }
-
- __fallthrough;
-
- case TYP_STRUCT:
- {
- GenTree* arg = curr;
- while (arg->gtOper == GT_COMMA)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- arg = arg->gtOp.gtOp2;
- }
-
- noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
- noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
- noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
-
- if (arg->gtOper == GT_MKREFANY)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- GenTreePtr op2 = arg->gtOp.gtOp2;
-
- addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
-
- /* Is this value a handle? */
- if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
- {
- /* Emit a fixup for the push instruction */
-
- inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
- genSinglePush();
- }
- else
- {
- regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
- inst_TT(INS_push, op2);
- genSinglePush();
- genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
- }
- addrReg = genKeepAddressable(op1, addrReg);
- inst_TT(INS_push, op1);
- genSinglePush();
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- opsz = 2 * TARGET_POINTER_SIZE;
- }
- else
- {
- noway_assert(arg->gtOper == GT_OBJ);
-
- if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
- {
- GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
- unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
-
- // As much as we would like this to be a noway_assert, we can't because
- // there are some weird casts out there, and backwards compatiblity
- // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
- // lvPromoted in general currently do not require the local to be
- // TYP_STRUCT, so this assert is really more about how we wish the world
- // was then some JIT invariant.
- assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
-
- Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
-
- if (varDsc->lvPromoted &&
- promotionType ==
- Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
- {
- assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
-
- addrReg = 0;
-
- // Get the number of BYTES to copy to the stack
- opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
- size_t bytesToBeCopied = opsz;
-
- // postponedFields is true if we have any postponed fields
- // Any field that does not start on a 4-byte boundary is a postponed field
- // Such a field is required to be a short or a byte
- //
- // postponedRegKind records the kind of scratch register we will
- // need to process the postponed fields
- // RBM_NONE means that we don't need a register
- //
- // expectedAlignedOffset records the aligned offset that
- // has to exist for a push to cover the postponed fields.
- // Since all promoted structs have the tightly packed property
- // we are guaranteed that we will have such a push
- //
- bool postponedFields = false;
- regMaskTP postponedRegKind = RBM_NONE;
- size_t expectedAlignedOffset = UINT_MAX;
-
- VARSET_TP* deadVarBits = NULL;
- compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
-
- // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
- //
- for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
- varNum >= (int)varDsc->lvFieldLclStart; varNum--)
- {
- LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
-#ifdef DEBUG
- if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
- {
- noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
- noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
- }
-#endif
- // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
- // For packed structs we will go back and store the unaligned bytes and shorts
- // in the next loop
- //
- if (fieldVarDsc->lvStackAligned())
- {
- if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
- fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
- {
- // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
- // Just push some junk (i.e EAX) on the stack.
- inst_RV(INS_push, REG_EAX, TYP_INT);
- genSinglePush();
-
- bytesToBeCopied -= sizeof(void*);
- }
-
- // If we have an expectedAlignedOffset make sure that this push instruction
- // is what we expect to cover the postponedFields
- //
- if (expectedAlignedOffset != UINT_MAX)
- {
- // This push must be for a small field
- noway_assert(fieldVarDsc->lvExactSize < 4);
- // The fldOffset for this push should be equal to the expectedAlignedOffset
- noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
- expectedAlignedOffset = UINT_MAX;
- }
-
- // Push the "upper half" of LONG var first
-
- if (isRegPairType(fieldVarDsc->lvType))
- {
- if (fieldVarDsc->lvOtherReg != REG_STK)
- {
- inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
- genSinglePush();
-
- // Prepare the set of vars to be cleared from gcref/gcbyref set
- // in case they become dead after genUpdateLife.
- // genDoneAddressable() will remove dead gc vars by calling
- // gcInfo.gcMarkRegSetNpt.
- // Although it is not addrReg, we just borrow the name here.
- addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
- }
- else
- {
- getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
- genSinglePush();
- }
-
- bytesToBeCopied -= sizeof(void*);
- }
-
- // Push the "upper half" of DOUBLE var if it is not enregistered.
-
- if (fieldVarDsc->lvType == TYP_DOUBLE)
- {
- if (!fieldVarDsc->lvRegister)
- {
- getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
- genSinglePush();
- }
-
- bytesToBeCopied -= sizeof(void*);
- }
-
- //
- // Push the field local.
- //
-
- if (fieldVarDsc->lvRegister)
- {
- if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
- {
- inst_RV(INS_push, fieldVarDsc->lvRegNum,
- genActualType(fieldVarDsc->TypeGet()));
- genSinglePush();
-
- // Prepare the set of vars to be cleared from gcref/gcbyref set
- // in case they become dead after genUpdateLife.
- // genDoneAddressable() will remove dead gc vars by calling
- // gcInfo.gcMarkRegSetNpt.
- // Although it is not addrReg, we just borrow the name here.
- addrReg |= genRegMask(fieldVarDsc->lvRegNum);
- }
- else
- {
- // Must be TYP_FLOAT or TYP_DOUBLE
- noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
-
- noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
- fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
-
- inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
-
- genSinglePush();
- if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
- {
- genSinglePush();
- }
-
-#if FEATURE_STACK_FP_X87
- GenTree* fieldTree = new (compiler, GT_REG_VAR)
- GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
- fieldTree->gtOper = GT_REG_VAR;
- fieldTree->gtRegNum = fieldVarDsc->lvRegNum;
- fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
- if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
- {
- if (fieldVarDsc->lvTracked &&
- (deadVarBits == NULL ||
- VarSetOps::IsMember(compiler, *deadVarBits,
- fieldVarDsc->lvVarIndex)))
- {
- fieldTree->gtFlags |= GTF_VAR_DEATH;
- }
- }
- genCodeForTreeStackFP_Leaf(fieldTree);
-
- // Take reg to top of stack
-
- FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
-
- // Pop it off to stack
- compCurFPState.Pop();
-
- getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
- REG_NA, REG_SPBASE, 0);
-#else
- NYI_FLAT_FP_X87("FP codegen");
-#endif
- }
- }
- else
- {
- getEmitter()->emitIns_S(INS_push,
- (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
- : EA_4BYTE,
- varNum, 0);
- genSinglePush();
- }
-
- bytesToBeCopied -= sizeof(void*);
- }
- else // not stack aligned
- {
- noway_assert(fieldVarDsc->lvExactSize < 4);
-
- // We will need to use a store byte or store word
- // to set this unaligned location
- postponedFields = true;
-
- if (expectedAlignedOffset != UINT_MAX)
- {
- // This should never change until it is set back to UINT_MAX by an aligned
- // offset
- noway_assert(expectedAlignedOffset ==
- roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
- }
-
- expectedAlignedOffset =
- roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
-
- noway_assert(expectedAlignedOffset < bytesToBeCopied);
-
- if (fieldVarDsc->lvRegister)
- {
- // Do we need to use a byte-able register?
- if (fieldVarDsc->lvExactSize == 1)
- {
- // Did we enregister fieldVarDsc2 in a non byte-able register?
- if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
- {
- // then we will need to grab a byte-able register
- postponedRegKind = RBM_BYTE_REGS;
- }
- }
- }
- else // not enregistered
- {
- if (fieldVarDsc->lvExactSize == 1)
- {
- // We will need to grab a byte-able register
- postponedRegKind = RBM_BYTE_REGS;
- }
- else
- {
- // We will need to grab any scratch register
- if (postponedRegKind != RBM_BYTE_REGS)
- postponedRegKind = RBM_ALLINT;
- }
- }
- }
- }
-
- // Now we've pushed all of the aligned fields.
- //
- // We should have pushed bytes equal to the entire struct
- noway_assert(bytesToBeCopied == 0);
-
- // We should have seen a push that covers every postponed field
- noway_assert(expectedAlignedOffset == UINT_MAX);
-
- // Did we have any postponed fields?
- if (postponedFields)
- {
- regNumber regNum = REG_STK; // means no register
-
- // If we needed a scratch register then grab it here
-
- if (postponedRegKind != RBM_NONE)
- regNum = regSet.rsGrabReg(postponedRegKind);
-
- // Forward loop, starts from the lowest field offset
- //
- for (unsigned varNum = varDsc->lvFieldLclStart;
- varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
- {
- LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
-
- // All stack aligned fields have already been pushed
- if (fieldVarDsc->lvStackAligned())
- continue;
-
- // We have a postponed field
-
- // It must be a byte or a short
- noway_assert(fieldVarDsc->lvExactSize < 4);
-
- // Is the field enregistered?
- if (fieldVarDsc->lvRegister)
- {
- // Frequently we can just use that register
- regNumber tmpRegNum = fieldVarDsc->lvRegNum;
-
- // Do we need to use a byte-able register?
- if (fieldVarDsc->lvExactSize == 1)
- {
- // Did we enregister the field in a non byte-able register?
- if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
- {
- // then we will need to use the byte-able register 'regNum'
- noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
-
- // Copy the register that contains fieldVarDsc into 'regNum'
- getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
- fieldVarDsc->lvRegNum);
- regTracker.rsTrackRegLclVar(regNum, varNum);
-
- // tmpRegNum is the register that we will extract the byte value from
- tmpRegNum = regNum;
- }
- noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
- }
-
- getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
- (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
- REG_SPBASE, fieldVarDsc->lvFldOffset);
- }
- else // not enregistered
- {
- // We will copy the non-enregister fieldVar into our scratch register 'regNum'
-
- noway_assert(regNum != REG_STK);
- getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
- (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
- 0);
-
- regTracker.rsTrackRegLclVar(regNum, varNum);
-
- // Store the value (byte or short) into the stack
-
- getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
- (emitAttr)fieldVarDsc->lvExactSize, regNum,
- REG_SPBASE, fieldVarDsc->lvFldOffset);
- }
- }
- }
- genUpdateLife(structLocalTree);
-
- break;
- }
- }
-
- genCodeForTree(arg->gtObj.gtOp1, 0);
- noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
- regNumber reg = arg->gtObj.gtOp1->gtRegNum;
- // Get the number of DWORDS to copy to the stack
- opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
- unsigned slots = (unsigned)(opsz / sizeof(void*));
-
- BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
-
- compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
-
- BOOL bNoneGC = TRUE;
- for (int i = slots - 1; i >= 0; --i)
- {
- if (gcLayout[i] != TYPE_GC_NONE)
- {
- bNoneGC = FALSE;
- break;
- }
- }
-
- /* passing large structures using movq instead of pushes does not increase codesize very much */
- unsigned movqLenMin = 8;
- unsigned movqLenMax = 64;
- unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
-
- if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
- {
- // Don't bother with this optimization in
- // rarely run blocks or when optimizing for size
- movqLenMax = movqLenMin = 0;
- }
- else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
- {
- // Be more aggressive when optimizing for speed
- movqLenMax *= 2;
- }
-
- /* Adjust for BB weight */
- if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
- {
- // Be more aggressive when we are inside a loop
- movqLenMax *= 2;
- }
-
- if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
- {
- JITLOG_THIS(compiler, (LL_INFO10000,
- "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
- opsz, compiler->info.compFullName));
-
- int stkDisp = (int)(unsigned)opsz;
- int curDisp = 0;
- regNumber xmmReg = REG_XMM0;
-
- if (opsz & 0x4)
- {
- stkDisp -= sizeof(void*);
- getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
- genSinglePush();
- }
-
- inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
- genStackLevel += stkDisp;
-
- while (curDisp < stkDisp)
- {
- getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
- getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
- curDisp += 2 * sizeof(void*);
- }
- noway_assert(curDisp == stkDisp);
- }
- else
- {
- for (int i = slots - 1; i >= 0; --i)
- {
- emitAttr fieldSize;
- if (gcLayout[i] == TYPE_GC_NONE)
- fieldSize = EA_4BYTE;
- else if (gcLayout[i] == TYPE_GC_REF)
- fieldSize = EA_GCREF;
- else
- {
- noway_assert(gcLayout[i] == TYPE_GC_BYREF);
- fieldSize = EA_BYREF;
- }
- getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*));
- genSinglePush();
- }
- }
- gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
- }
-
- addrReg = 0;
- break;
- }
-
- default:
- noway_assert(!"unhandled/unexpected arg type");
- NO_WAY("unhandled/unexpected arg type");
- }
-
- /* Update the current set of live variables */
-
- genUpdateLife(curr);
-
- /* Update the current set of register pointers */
-
- noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
- genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
-
- /* Remember how much stuff we've pushed on the stack */
-
- size += opsz;
-
- /* Update the current argument stack offset */
-
- /* Continue with the next argument, if any more are present */
-
- } // while args
-
- /* Move the deferred arguments to registers */
-
- for (args = regArgs; args; args = args->Rest())
- {
- curr = args->Current();
-
- assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
- assert(curArgTabEntry);
- regNumber regNum = curArgTabEntry->regNum;
-
- noway_assert(isRegParamType(curr->TypeGet()));
- noway_assert(curr->gtType != TYP_VOID);
-
- /* Evaluate the argument to a register [pair] */
-
- if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
- {
- /* Check if this is the guess area for the resolve interface call
- * Pass a size of EA_OFFSET*/
- if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
- {
- getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
- regTracker.rsTrackRegTrash(regNum);
-
- /* The value is now in the appropriate register */
-
- genMarkTreeInReg(curr, regNum);
- }
- else
- {
- genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
- }
-
- noway_assert(curr->gtRegNum == regNum);
-
- /* If the register is already marked as used, it will become
- multi-used. However, since it is a callee-trashed register,
- we will have to spill it before the call anyway. So do it now */
-
- if (regSet.rsMaskUsed & genRegMask(regNum))
- {
- noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
- regSet.rsSpillReg(regNum);
- }
-
- /* Mark the register as 'used' */
-
- regSet.rsMarkRegUsed(curr);
- }
- else
- {
- noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
- }
- }
-
- /* If any of the previously loaded arguments were spilled - reload them */
-
- for (args = regArgs; args; args = args->Rest())
- {
- curr = args->Current();
- assert(curr);
-
- if (curr->gtFlags & GTF_SPILLED)
- {
- if (isRegPairType(curr->gtType))
- {
- regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
- }
- else
- {
- regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
- }
- }
- }
-
- /* Return the total size pushed */
-
- return size;
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-#else // FEATURE_FIXED_OUT_ARGS
-
-//
-// ARM and AMD64 uses this method to pass the stack based args
-//
-// returns size pushed (always zero)
-size_t CodeGen::genPushArgList(GenTreePtr call)
-{
-
- GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs;
- GenTreePtr curr;
- var_types type;
- int argSize;
-
- GenTreeArgList* args;
- // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
- // so we can iterate over this argument list more uniformly.
- // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
- GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
- if (call->gtCall.gtCallObjp == NULL)
- {
- args = call->gtCall.gtCallArgs;
- }
- else
- {
- objpArgList.Current() = call->gtCall.gtCallObjp;
- args = &objpArgList;
- }
-
- for (; args; args = args->Rest())
- {
- /* Get hold of the next argument value */
- curr = args->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
- assert(curArgTabEntry);
- regNumber regNum = curArgTabEntry->regNum;
- int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
-
- /* See what type of a value we're passing */
- type = curr->TypeGet();
-
- if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
- {
- type = TYP_VOID;
- }
-
- // This holds the set of registers corresponding to enregistered promoted struct field variables
- // that go dead after this use of the variable in the argument list.
- regMaskTP deadFieldVarRegs = RBM_NONE;
-
- argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
-
- if (curr->IsArgPlaceHolderNode())
- {
- assert(curr->gtFlags & GTF_LATE_ARG);
- goto DEFERRED;
- }
-
- if (varTypeIsSmall(type))
- {
- // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
- type = TYP_I_IMPL;
- }
-
- switch (type)
- {
-
- case TYP_DOUBLE:
- case TYP_LONG:
-
-#if defined(_TARGET_ARM_)
-
- argSize = (TARGET_POINTER_SIZE * 2);
-
- /* Is the value a constant? */
-
- if (curr->gtOper == GT_CNS_LNG)
- {
- assert((curr->gtFlags & GTF_LATE_ARG) == 0);
-
- int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
- int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
-
- instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
-
- instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
- argOffset + 4);
-
- break;
- }
- else
- {
- genCodeForTree(curr, 0);
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- // The arg was assigned into a temp and
- // will be moved to the correct register or slot later
-
- argSize = 0; // nothing is passed on the stack
- }
- else
- {
- // The arg is passed in the outgoing argument area of the stack frame
- //
- assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
- assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
-
- if (type == TYP_LONG)
- {
- regNumber regLo = genRegPairLo(curr->gtRegPair);
- regNumber regHi = genRegPairHi(curr->gtRegPair);
-
- assert(regLo != REG_STK);
- inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
- if (regHi == REG_STK)
- {
- regHi = regSet.rsPickFreeReg();
- inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
- regTracker.rsTrackRegTrash(regHi);
- }
- inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
- }
- else // (type == TYP_DOUBLE)
- {
- inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
- }
- }
- }
- break;
-
-#elif defined(_TARGET_64BIT_)
- __fallthrough;
-#else
-#error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
-#endif
-
- case TYP_REF:
- case TYP_BYREF:
-
- case TYP_FLOAT:
- case TYP_INT:
- /* Is the value a constant? */
-
- if (curr->gtOper == GT_CNS_INT)
- {
- assert(!(curr->gtFlags & GTF_LATE_ARG));
-
-#if REDUNDANT_LOAD
- regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
-
- if (reg != REG_NA)
- {
- inst_SA_RV(ins_Store(type), argOffset, reg, type);
- }
- else
-#endif
- {
- bool needReloc = compiler->opts.compReloc && curr->IsIconHandle();
- emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
- instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
- compiler->lvaOutgoingArgSpaceVar, argOffset);
- }
- break;
- }
-
- /* This is passed as a pointer-sized integer argument */
-
- genCodeForTree(curr, 0);
-
- // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
- if (curr->gtFlags & GTF_LATE_ARG)
- {
-#ifdef _TARGET_ARM_
- argSize = 0; // nothing is passed on the stack
-#endif
- }
- else
- {
- // The arg is passed in the outgoing argument area of the stack frame
-
- assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
- assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
- inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
-
- if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
- }
- break;
-
- case TYP_VOID:
- /* Is this a nothing node, deferred register argument? */
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- /* Handle side-effects */
- DEFERRED:
- if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
- {
-#ifdef _TARGET_ARM_
- {
- GenTreePtr curArgNode = curArgTabEntry->node;
- var_types curRegArgType = curArgNode->gtType;
- assert(curRegArgType != TYP_UNDEF);
-
- if (curRegArgType == TYP_STRUCT)
- {
- // If the RHS of the COPYBLK is a promoted struct local, then the use of that
- // is an implicit use of all its field vars. If these are last uses, remember that,
- // so we can later update the GC compiler->info.
- if (curr->OperIsCopyBlkOp())
- deadFieldVarRegs |= genFindDeadFieldRegs(curr);
- }
- }
-#endif // _TARGET_ARM_
-
- genCodeForTree(curr, 0);
- }
- else
- {
- assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
- }
-
-#if defined(_TARGET_ARM_)
- argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
-#endif
- }
- else
- {
- for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
-
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- }
- }
- break;
-
-#ifdef _TARGET_ARM_
-
- case TYP_STRUCT:
- {
- GenTree* arg = curr;
- while (arg->gtOper == GT_COMMA)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- arg = arg->gtOp.gtOp2;
- }
- noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
-
- CORINFO_CLASS_HANDLE clsHnd;
- unsigned argAlign;
- unsigned slots;
- BYTE* gcLayout = NULL;
-
- // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
- // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
- // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
- // table entry for the promoted struct local. As we fill slots with the contents of a
- // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
- // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
- // variable number of the next field variable to be copied.
- LclVarDsc* promotedStructLocalVarDesc = NULL;
- GenTreePtr structLocalTree = NULL;
- unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot.
- unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
- unsigned promotedStructOffsetOfFirstStackSlot = 0;
- unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized.
-
- if (arg->OperGet() == GT_OBJ)
- {
- clsHnd = arg->gtObj.gtClass;
- unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
- argAlign =
- roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
- argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
-
- slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
-
- gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
-
- compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
-
- // Are we loading a promoted struct local var?
- if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
- {
- structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
- unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
-
- // As much as we would like this to be a noway_assert, we can't because
- // there are some weird casts out there, and backwards compatiblity
- // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
- // lvPromoted in general currently do not require the local to be
- // TYP_STRUCT, so this assert is really more about how we wish the world
- // was then some JIT invariant.
- assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
-
- Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
-
- if (varDsc->lvPromoted &&
- promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
- // on stack.
- {
- assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
- promotedStructLocalVarDesc = varDsc;
- nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
- }
- }
- }
- else
- {
- noway_assert(arg->OperGet() == GT_MKREFANY);
-
- clsHnd = NULL;
- argAlign = TARGET_POINTER_SIZE;
- argSize = 2 * TARGET_POINTER_SIZE;
- slots = 2;
- }
-
- // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
- noway_assert(regNum == REG_STK);
-
- // This code passes a TYP_STRUCT by value using the outgoing arg space var
- //
- if (arg->OperGet() == GT_OBJ)
- {
- regNumber regSrc = REG_STK;
- regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
- int cStackSlots = 0;
-
- if (promotedStructLocalVarDesc == NULL)
- {
- genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
- noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
- regSrc = arg->gtObj.gtOp1->gtRegNum;
- }
-
- // The number of bytes to add "argOffset" to get the arg offset of the current slot.
- int extraArgOffset = 0;
-
- for (unsigned i = 0; i < slots; i++)
- {
- emitAttr fieldSize;
- if (gcLayout[i] == TYPE_GC_NONE)
- fieldSize = EA_PTRSIZE;
- else if (gcLayout[i] == TYPE_GC_REF)
- fieldSize = EA_GCREF;
- else
- {
- noway_assert(gcLayout[i] == TYPE_GC_BYREF);
- fieldSize = EA_BYREF;
- }
-
- // Pass the argument using the lvaOutgoingArgSpaceVar
-
- if (promotedStructLocalVarDesc != NULL)
- {
- if (argOffsetOfFirstStackSlot == UINT32_MAX)
- argOffsetOfFirstStackSlot = argOffset;
-
- regNumber maxRegArg = regNumber(MAX_REG_ARG);
- bool filledExtraSlot = genFillSlotFromPromotedStruct(
- arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
- &bytesOfNextSlotOfCurPromotedStruct,
- /*pCurRegNum*/ &maxRegArg,
- /*argOffset*/ argOffset + extraArgOffset,
- /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
- argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
- extraArgOffset += TARGET_POINTER_SIZE;
- // If we filled an extra slot with an 8-byte value, skip a slot.
- if (filledExtraSlot)
- {
- i++;
- cStackSlots++;
- extraArgOffset += TARGET_POINTER_SIZE;
- }
- }
- else
- {
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
- i * TARGET_POINTER_SIZE);
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar,
- argOffset + cStackSlots * TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(regTmp);
- }
- cStackSlots++;
- }
-
- if (promotedStructLocalVarDesc == NULL)
- {
- regSet.rsMarkRegFree(genRegMask(regSrc));
- }
- if (structLocalTree != NULL)
- genUpdateLife(structLocalTree);
- }
- else
- {
- assert(arg->OperGet() == GT_MKREFANY);
- PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
- argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
- }
- }
- break;
-#endif // _TARGET_ARM_
-
- default:
- assert(!"unhandled/unexpected arg type");
- NO_WAY("unhandled/unexpected arg type");
- }
-
- /* Update the current set of live variables */
-
- genUpdateLife(curr);
-
- // Now, if some copied field locals were enregistered, and they're now dead, update the set of
- // register holding gc pointers.
- if (deadFieldVarRegs != 0)
- gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
-
- /* Update the current argument stack offset */
-
- argOffset += argSize;
-
- /* Continue with the next argument, if any more are present */
- } // while (args)
-
- if (lateArgs)
- {
- SetupLateArgs(call);
- }
-
- /* Return the total size pushed */
-
- return 0;
-}
-
-#ifdef _TARGET_ARM_
-bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr arg,
- fgArgTabEntryPtr curArgTabEntry,
- LclVarDsc* promotedStructLocalVarDesc,
- emitAttr fieldSize,
- unsigned* pNextPromotedStructFieldVar,
- unsigned* pBytesOfNextSlotOfCurPromotedStruct,
- regNumber* pCurRegNum,
- int argOffset,
- int fieldOffsetOfFirstStackSlot,
- int argOffsetOfFirstStackSlot,
- regMaskTP* deadFieldVarRegs,
- regNumber* pRegTmp)
-{
- unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
- unsigned limitPromotedStructFieldVar =
- promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
- unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
-
- regNumber curRegNum = *pCurRegNum;
- regNumber regTmp = *pRegTmp;
- bool filledExtraSlot = false;
-
- if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
- {
- // We've already finished; just return.
- // We can reach this because the calling loop computes a # of slots based on the size of the struct.
- // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
- // the fourth slot, even though we've copied all the fields.
- return false;
- }
-
- LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
-
- // Does this field fill an entire slot, and does it go at the start of the slot?
- // If so, things are easier...
-
- bool oneFieldFillsSlotFromStart =
- (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
- && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and...
- && (nextPromotedStructFieldVar + 1 ==
- limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
- || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
-
- // Compute the proper size.
- if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
- {
- switch (fieldVarDsc->lvExactSize)
- {
- case 1:
- fieldSize = EA_1BYTE;
- break;
- case 2:
- fieldSize = EA_2BYTE;
- break;
- case 8:
- // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
- // in which case we should not have promoted the struct variable.
- noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
-
- // If the current reg number is not aligned, align it, and return to the calling loop, which will
- // consider that a filled slot and move on to the next argument register.
- if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
- {
- // We must update the slot target, however!
- bytesOfNextSlotOfCurPromotedStruct += 4;
- *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
- return false;
- }
- // Dest is an aligned pair of arg regs, if the struct type demands it.
- noway_assert((curRegNum % 2) == 0);
- // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
- break;
- default:
- assert(fieldVarDsc->lvExactSize == 4);
- break;
- }
- }
- else
- {
- // If the gc layout said it's a GC ref or byref, then the field size must be 4.
- noway_assert(fieldVarDsc->lvExactSize == 4);
- }
-
- // We may need the type of the field to influence instruction selection.
- // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
- // If the fieldVarDsc is enregistered float we must use the field's exact type
- // however if it is in memory we can use an integer type TYP_I_IMPL
- //
- var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
- if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
- {
- fieldTypeForInstr = TYP_I_IMPL;
- }
-
- // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
- if (curArgTabEntry->isHfaRegArg)
- {
- assert(oneFieldFillsSlotFromStart);
-
- // Is the field variable promoted?
- if (fieldVarDsc->lvRegister)
- {
- // Move the field var living in register to dst, if they are different registers.
- regNumber srcReg = fieldVarDsc->lvRegNum;
- regNumber dstReg = curRegNum;
- if (srcReg != dstReg)
- {
- inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
- assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
- }
- }
- else
- {
- // Move the field var living in stack to dst.
- getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
- fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
- nextPromotedStructFieldVar, 0);
- assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
- }
-
- // Mark the arg as used and using reg val.
- genMarkTreeInReg(arg, curRegNum);
- regSet.SetUsedRegFloat(arg, true);
-
- // Advance for double.
- if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
- {
- bytesOfNextSlotOfCurPromotedStruct += 4;
- curRegNum = REG_NEXT(curRegNum);
- arg->gtRegNum = curRegNum;
- regSet.SetUsedRegFloat(arg, true);
- filledExtraSlot = true;
- }
- arg->gtRegNum = curArgTabEntry->regNum;
-
- // Advance.
- bytesOfNextSlotOfCurPromotedStruct += 4;
- nextPromotedStructFieldVar++;
- }
- else
- {
- if (oneFieldFillsSlotFromStart)
- {
- // If we write to the stack, offset in outgoing args at which we'll write.
- int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
- assert(fieldArgOffset >= 0);
-
- // Is the source a register or memory?
- if (fieldVarDsc->lvRegister)
- {
- if (fieldTypeForInstr == TYP_DOUBLE)
- {
- fieldSize = EA_8BYTE;
- }
-
- // Are we writing to a register or to the stack?
- if (curRegNum != MAX_REG_ARG)
- {
- // Source is register and Dest is register.
-
- instruction insCopy = INS_mov;
-
- if (varTypeIsFloating(fieldTypeForInstr))
- {
- if (fieldTypeForInstr == TYP_FLOAT)
- {
- insCopy = INS_vmov_f2i;
- }
- else
- {
- assert(fieldTypeForInstr == TYP_DOUBLE);
- insCopy = INS_vmov_d2i;
- }
- }
-
- // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second
- // register (which may become a tmp register, if its held in the argument register that the first
- // register to be copied will overwrite).
- regNumber otherRegNum = REG_STK;
- if (fieldVarDsc->lvType == TYP_LONG)
- {
- otherRegNum = fieldVarDsc->lvOtherReg;
- // Are we about to overwrite?
- if (otherRegNum == curRegNum)
- {
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
- // Copy the second register to the temp reg.
- getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
- regTracker.rsTrackRegCopy(regTmp, otherRegNum);
- otherRegNum = regTmp;
- }
- }
-
- if (fieldVarDsc->lvType == TYP_DOUBLE)
- {
- assert(curRegNum <= REG_R2);
- getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
- fieldVarDsc->lvRegNum);
- regTracker.rsTrackRegTrash(curRegNum);
- regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
- }
- else
- {
- // Now do the first register.
- // It might be the case that it's already in the desired register; if so do nothing.
- if (curRegNum != fieldVarDsc->lvRegNum)
- {
- getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
- regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
- }
- }
-
- // In either case, mark the arg register as used.
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
-
- // Is there a second half of the value?
- if (fieldVarDsc->lvExactSize == 8)
- {
- curRegNum = genRegArgNext(curRegNum);
- // The second dest reg must also be an argument register.
- noway_assert(curRegNum < MAX_REG_ARG);
-
- // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
- if (fieldVarDsc->lvType == TYP_LONG)
- {
- // Copy the second register into the next argument register
-
- // If it's a register variable for a TYP_LONG value, then otherReg now should
- // hold the second register or it might say that it's in the stack.
- if (otherRegNum == REG_STK)
- {
- // Apparently when we partially enregister, we allocate stack space for the full
- // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
- // parameter, to get the high half.
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
- nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(curRegNum);
- }
- else
- {
- // The other half is in a register.
- // Again, it might be the case that it's already in the desired register; if so do
- // nothing.
- if (curRegNum != otherRegNum)
- {
- getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
- regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
- }
- }
- }
-
- // Also mark the 2nd arg register as used.
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
- // Record the fact that we filled in an extra register slot
- filledExtraSlot = true;
- }
- }
- else
- {
- // Source is register and Dest is memory (OutgoingArgSpace).
-
- // Now write the srcReg into the right location in the outgoing argument list.
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
- compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
-
- if (fieldVarDsc->lvExactSize == 8)
- {
- // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
- if (fieldVarDsc->lvType == TYP_LONG)
- {
- if (fieldVarDsc->lvOtherReg == REG_STK)
- {
- // Source is stack.
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
- // Apparently if we partially enregister, we allocate stack space for the full
- // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
- // parameter, to get the high half.
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
- nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(regTmp);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar,
- fieldArgOffset + TARGET_POINTER_SIZE);
- }
- else
- {
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
- compiler->lvaOutgoingArgSpaceVar,
- fieldArgOffset + TARGET_POINTER_SIZE);
- }
- }
- // Record the fact that we filled in an extra register slot
- filledExtraSlot = true;
- }
- }
- assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
- // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
- if (arg->gtFlags & GTF_VAR_DEATH)
- {
- *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
- // We don't bother with the second reg of a register pair, since if it has one,
- // it obviously doesn't hold a pointer.
- }
- }
- else
- {
- // Source is in memory.
-
- if (curRegNum != MAX_REG_ARG)
- {
- // Dest is reg.
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
- nextPromotedStructFieldVar, 0);
- regTracker.rsTrackRegTrash(curRegNum);
-
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
-
- if (fieldVarDsc->lvExactSize == 8)
- {
- noway_assert(fieldSize == EA_4BYTE);
- curRegNum = genRegArgNext(curRegNum);
- noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
- nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(curRegNum);
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
- // Record the fact that we filled in an extra stack slot
- filledExtraSlot = true;
- }
- }
- else
- {
- // Dest is stack.
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
- nextPromotedStructFieldVar, 0);
-
- // Now write regTmp into the right location in the outgoing argument list.
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
- // We overwrote "regTmp", so erase any previous value we recorded that it contained.
- regTracker.rsTrackRegTrash(regTmp);
-
- if (fieldVarDsc->lvExactSize == 8)
- {
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
- nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar,
- fieldArgOffset + TARGET_POINTER_SIZE);
- // Record the fact that we filled in an extra stack slot
- filledExtraSlot = true;
- }
- }
- }
-
- // Bump up the following if we filled in an extra slot
- if (filledExtraSlot)
- bytesOfNextSlotOfCurPromotedStruct += 4;
-
- // Go to the next field.
- nextPromotedStructFieldVar++;
- if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
- {
- fieldVarDsc = NULL;
- }
- else
- {
- // The next field should have the same parent variable, and we should have put the field vars in order
- // sorted by offset.
- assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
- fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
- fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
- fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
- }
- bytesOfNextSlotOfCurPromotedStruct += 4;
- }
- else // oneFieldFillsSlotFromStart == false
- {
- // The current slot should contain more than one field.
- // We'll construct a word in memory for the slot, then load it into a register.
- // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
- // slot, in which case we'll just skip this loop altogether.)
- while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
- {
- // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
- // whose fields have their natural alignment, and alignment == size on ARM).
- noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
-
- // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
- int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
- noway_assert(argOffset == INT32_MAX ||
- (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
-
- if (fieldVarDsc->lvRegister)
- {
- if (curRegNum != MAX_REG_ARG)
- {
- noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
-
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
- compiler->lvaPromotedStructAssemblyScratchVar,
- fieldVarDsc->lvFldOffset % 4);
- }
- else
- {
- // Dest is stack; write directly.
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
- compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
- }
- }
- else
- {
- // Source is in memory.
-
- // Make sure we have a temporary register to use...
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
- nextPromotedStructFieldVar, 0);
- regTracker.rsTrackRegTrash(regTmp);
-
- if (curRegNum != MAX_REG_ARG)
- {
- noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
-
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
- compiler->lvaPromotedStructAssemblyScratchVar,
- fieldVarDsc->lvFldOffset % 4);
- }
- else
- {
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
- }
- }
- // Go to the next field.
- nextPromotedStructFieldVar++;
- if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
- {
- fieldVarDsc = NULL;
- }
- else
- {
- // The next field should have the same parent variable, and we should have put the field vars in
- // order sorted by offset.
- noway_assert(fieldVarDsc->lvIsStructField &&
- compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
- fieldVarDsc->lvParentLcl ==
- compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
- fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
- fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
- }
- }
- // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
- // write to an argument register, do so.
- if (curRegNum != MAX_REG_ARG)
- {
- noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
- compiler->lvaPromotedStructAssemblyScratchVar, 0);
- regTracker.rsTrackRegTrash(curRegNum);
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
- }
- // We've finished a slot; set the goal of the next slot.
- bytesOfNextSlotOfCurPromotedStruct += 4;
- }
- }
-
- // Write back the updates.
- *pNextPromotedStructFieldVar = nextPromotedStructFieldVar;
- *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
- *pCurRegNum = curRegNum;
- *pRegTmp = regTmp;
-
- return filledExtraSlot;
-}
-#endif // _TARGET_ARM_
-
-regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
-{
- noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
- GenTreePtr rhs = cpBlk->gtOp.gtOp1;
- regMaskTP res = 0;
- if (rhs->OperIsIndir())
- {
- GenTree* addr = rhs->AsIndir()->Addr();
- if (addr->gtOper == GT_ADDR)
- {
- rhs = addr->gtOp.gtOp1;
- }
- }
- if (rhs->OperGet() == GT_LCL_VAR)
- {
- LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
- if (rhsDsc->lvPromoted)
- {
- // It is promoted; iterate over its field vars.
- unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
- for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
- {
- LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
- // Did the variable go dead, and is it enregistered?
- if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
- {
- // Add the register number to the set of registers holding field vars that are going dead.
- res |= genRegMask(fieldVarDsc->lvRegNum);
- }
- }
- }
- }
- return res;
-}
-
-void CodeGen::SetupLateArgs(GenTreePtr call)
-{
- GenTreeArgList* lateArgs;
- GenTreePtr curr;
-
- /* Generate the code to move the late arguments into registers */
-
- for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
- {
- curr = lateArgs->Current();
- assert(curr);
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
- assert(curArgTabEntry);
- regNumber regNum = curArgTabEntry->regNum;
- unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
-
- assert(isRegParamType(curr->TypeGet()));
- assert(curr->gtType != TYP_VOID);
-
- /* If the register is already marked as used, it will become
- multi-used. However, since it is a callee-trashed register,
- we will have to spill it before the call anyway. So do it now */
-
- {
- // Remember which registers hold pointers. We will spill
- // them, but the code that follows will fetch reg vars from
- // the registers, so we need that gc compiler->info.
- // Also regSet.rsSpillReg doesn't like to spill enregistered
- // variables, but if this is their last use that is *exactly*
- // what we need to do, so we have to temporarily pretend
- // they are no longer live.
- // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
- // when their last use is about to occur?
- // It is because this is the second operand to be evaluated
- // of some parent binary op, and the first operand is
- // live across this tree, and thought it could re-use the
- // variables register (like a GT_REG_VAR). This probably
- // is caused by RegAlloc assuming the first operand would
- // evaluate into another register.
- regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
- regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
- regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
- regSet.RemoveMaskVars(rsTemp);
-
- regNumber regNum2 = regNum;
- for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
- {
- if (regSet.rsMaskUsed & genRegMask(regNum2))
- {
- assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
- regSet.rsSpillReg(regNum2);
- }
- regNum2 = genRegArgNext(regNum2);
- assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
- }
-
- // Restore gc tracking masks.
- gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
- gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
-
- // Set maskvars back to normal
- regSet.AddMaskVars(rsTemp);
- }
-
- /* Evaluate the argument to a register */
-
- /* Check if this is the guess area for the resolve interface call
- * Pass a size of EA_OFFSET*/
- if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
- {
- getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
- regTracker.rsTrackRegTrash(regNum);
-
- /* The value is now in the appropriate register */
-
- genMarkTreeInReg(curr, regNum);
-
- regSet.rsMarkRegUsed(curr);
- }
-#ifdef _TARGET_ARM_
- else if (curr->gtType == TYP_STRUCT)
- {
- GenTree* arg = curr;
- while (arg->gtOper == GT_COMMA)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- arg = arg->gtOp.gtOp2;
- }
- noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
- (arg->OperGet() == GT_MKREFANY));
-
- // This code passes a TYP_STRUCT by value using
- // the argument registers first and
- // then the lvaOutgoingArgSpaceVar area.
- //
-
- // We prefer to choose low registers here to reduce code bloat
- regMaskTP regNeedMask = RBM_LOW_REGS;
- unsigned firstStackSlot = 0;
- unsigned argAlign = TARGET_POINTER_SIZE;
- size_t originalSize = InferStructOpSizeAlign(arg, &argAlign);
-
- unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
- assert(slots > 0);
-
- if (regNum == REG_STK)
- {
- firstStackSlot = 0;
- }
- else
- {
- if (argAlign == (TARGET_POINTER_SIZE * 2))
- {
- assert((regNum & 1) == 0);
- }
-
- // firstStackSlot is an index of the first slot of the struct
- // that is on the stack, in the range [0,slots]. If it is 'slots',
- // then the entire struct is in registers. It is also equal to
- // the number of slots of the struct that are passed in registers.
-
- if (curArgTabEntry->isHfaRegArg)
- {
- // HFA arguments that have been decided to go into registers fit the reg space.
- assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
- assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
- "HFA argument doesn't fit entirely in FP argument registers");
- firstStackSlot = slots;
- }
- else if (regNum + slots > MAX_REG_ARG)
- {
- firstStackSlot = MAX_REG_ARG - regNum;
- assert(firstStackSlot > 0);
- }
- else
- {
- firstStackSlot = slots;
- }
-
- if (curArgTabEntry->isHfaRegArg)
- {
- // Mask out the registers used by an HFA arg from the ones used to compute tree into.
- for (unsigned i = regNum; i < regNum + slots; i++)
- {
- regNeedMask &= ~genRegMask(regNumber(i));
- }
- }
- }
-
- // This holds the set of registers corresponding to enregistered promoted struct field variables
- // that go dead after this use of the variable in the argument list.
- regMaskTP deadFieldVarRegs = RBM_NONE;
-
- // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
- // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
- // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
- // table entry for the promoted struct local. As we fill slots with the contents of a
- // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
- // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
- // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
- // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
- // to be copied.
- LclVarDsc* promotedStructLocalVarDesc = NULL;
- unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
- unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
- GenTreePtr structLocalTree = NULL;
-
- BYTE* gcLayout = NULL;
- regNumber regSrc = REG_NA;
- if (arg->gtOper == GT_OBJ)
- {
- // Are we loading a promoted struct local var?
- if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
- {
- structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
- unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
-
- Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
-
- if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
- // guaranteed to
- // live on stack.
- {
- // Fix 388395 ARM JitStress WP7
- noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
-
- assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
- promotedStructLocalVarDesc = varDsc;
- nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
- }
- }
-
- if (promotedStructLocalVarDesc == NULL)
- {
- // If it's not a promoted struct variable, set "regSrc" to the address
- // of the struct local.
- genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
- noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
- regSrc = arg->gtObj.gtOp1->gtRegNum;
- // Remove this register from the set of registers that we pick from, unless slots equals 1
- if (slots > 1)
- regNeedMask &= ~genRegMask(regSrc);
- }
-
- gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
- compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
- }
- else if (arg->gtOper == GT_LCL_VAR)
- {
- // Move the address of the LCL_VAR in arg into reg
-
- unsigned varNum = arg->gtLclVarCommon.gtLclNum;
-
- // Are we loading a promoted struct local var?
- structLocalTree = arg;
- unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
-
- noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
-
- Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
-
- if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
- // guaranteed to live
- // on stack.
- {
- assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
- promotedStructLocalVarDesc = varDsc;
- nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
- }
-
- if (promotedStructLocalVarDesc == NULL)
- {
- regSrc = regSet.rsPickFreeReg(regNeedMask);
- // Remove this register from the set of registers that we pick from, unless slots equals 1
- if (slots > 1)
- regNeedMask &= ~genRegMask(regSrc);
-
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
- regTracker.rsTrackRegTrash(regSrc);
- gcLayout = compiler->lvaGetGcLayout(varNum);
- }
- }
- else if (arg->gtOper == GT_MKREFANY)
- {
- assert(slots == 2);
- assert((firstStackSlot == 1) || (firstStackSlot == 2));
- assert(argOffset == 0); // ???
- PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
-
- // Adjust argOffset if part of this guy was pushed onto the stack
- if (firstStackSlot < slots)
- {
- argOffset += TARGET_POINTER_SIZE;
- }
-
- // Skip the copy loop below because we have already placed the argument in the right place
- slots = 0;
- gcLayout = NULL;
- }
- else
- {
- assert(!"Unsupported TYP_STRUCT arg kind");
- gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
- }
-
- if (promotedStructLocalVarDesc != NULL)
- {
- // We must do do the stack parts first, since those might need values
- // from argument registers that will be overwritten in the portion of the
- // loop that writes into the argument registers.
- bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
- // Now find the var number of the first that starts in the first stack slot.
- unsigned fieldVarLim =
- promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
- while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
- (firstStackSlot * TARGET_POINTER_SIZE) &&
- nextPromotedStructFieldVar < fieldVarLim)
- {
- nextPromotedStructFieldVar++;
- }
- // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
- // first stack slot is after the last slot.
- assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
- }
-
- if (slots > 0) // the mkref case may have set "slots" to zero.
- {
- // First pass the stack portion of the struct (if any)
- //
- int argOffsetOfFirstStackSlot = argOffset;
- for (unsigned i = firstStackSlot; i < slots; i++)
- {
- emitAttr fieldSize;
- if (gcLayout[i] == TYPE_GC_NONE)
- fieldSize = EA_PTRSIZE;
- else if (gcLayout[i] == TYPE_GC_REF)
- fieldSize = EA_GCREF;
- else
- {
- noway_assert(gcLayout[i] == TYPE_GC_BYREF);
- fieldSize = EA_BYREF;
- }
-
- regNumber maxRegArg = regNumber(MAX_REG_ARG);
- if (promotedStructLocalVarDesc != NULL)
- {
- regNumber regTmp = REG_STK;
-
- bool filledExtraSlot =
- genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
- &nextPromotedStructFieldVar,
- &bytesOfNextSlotOfCurPromotedStruct,
- /*pCurRegNum*/ &maxRegArg, argOffset,
- /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
- TARGET_POINTER_SIZE,
- argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
- if (filledExtraSlot)
- {
- i++;
- argOffset += TARGET_POINTER_SIZE;
- }
- }
- else // (promotedStructLocalVarDesc == NULL)
- {
- // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
- // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
- // to regSet.rsPickFreeReg, so we need to be a little more forceful.
- // Otherwise, just re-use the same register.
- //
- regNumber regTmp = regSrc;
- if (slots != 1)
- {
- regMaskTP regSrcUsed;
- regSet.rsLockReg(genRegMask(regSrc), &regSrcUsed);
-
- regTmp = regSet.rsPickFreeReg(regNeedMask);
-
- noway_assert(regTmp != regSrc);
-
- regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
- }
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
- i * TARGET_POINTER_SIZE);
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar, argOffset);
- regTracker.rsTrackRegTrash(regTmp);
- }
- argOffset += TARGET_POINTER_SIZE;
- }
-
- // Now pass the register portion of the struct
- //
-
- bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
- if (promotedStructLocalVarDesc != NULL)
- nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
-
- // Create a nested loop here so that the first time thru the loop
- // we setup all of the regArg registers except for possibly
- // the one that would overwrite regSrc. Then in the final loop
- // (if necessary) we just setup regArg/regSrc with the overwrite
- //
- bool overwriteRegSrc = false;
- bool needOverwriteRegSrc = false;
- do
- {
- if (needOverwriteRegSrc)
- overwriteRegSrc = true;
-
- for (unsigned i = 0; i < firstStackSlot; i++)
- {
- regNumber regArg = (regNumber)(regNum + i);
-
- if (overwriteRegSrc == false)
- {
- if (regArg == regSrc)
- {
- needOverwriteRegSrc = true;
- continue;
- }
- }
- else
- {
- if (regArg != regSrc)
- continue;
- }
-
- emitAttr fieldSize;
- if (gcLayout[i] == TYPE_GC_NONE)
- fieldSize = EA_PTRSIZE;
- else if (gcLayout[i] == TYPE_GC_REF)
- fieldSize = EA_GCREF;
- else
- {
- noway_assert(gcLayout[i] == TYPE_GC_BYREF);
- fieldSize = EA_BYREF;
- }
-
- regNumber regTmp = REG_STK;
- if (promotedStructLocalVarDesc != NULL)
- {
- bool filledExtraSlot =
- genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
- fieldSize, &nextPromotedStructFieldVar,
- &bytesOfNextSlotOfCurPromotedStruct,
- /*pCurRegNum*/ &regArg,
- /*argOffset*/ INT32_MAX,
- /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
- /*argOffsetOfFirstStackSlot*/ INT32_MAX,
- &deadFieldVarRegs, &regTmp);
- if (filledExtraSlot)
- i++;
- }
- else
- {
- getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
- fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
- }
- regTracker.rsTrackRegTrash(regArg);
- }
- } while (needOverwriteRegSrc != overwriteRegSrc);
- }
-
- if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
- {
- regSet.rsMarkRegFree(genRegMask(regSrc));
- }
-
- if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
- // used.
- {
- arg->gtFlags |= GTF_REG_VAL;
- for (unsigned i = 1; i < firstStackSlot; i++)
- {
- arg->gtRegNum = (regNumber)(regNum + i);
- curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
- }
- arg->gtRegNum = regNum;
- curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
- }
-
- // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
- // so update liveness.
- genUpdateLife(arg);
-
- // Now, if some copied field locals were enregistered, and they're now dead, update the set of
- // register holding gc pointers.
- if (deadFieldVarRegs != RBM_NONE)
- gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
- }
- else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
- {
- if (curArgTabEntry->regNum == REG_STK)
- {
- // The arg is passed in the outgoing argument area of the stack frame
- genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
- assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
-
- inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
- inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
- }
- else
- {
- assert(regNum < REG_ARG_LAST);
- regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
- genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
- assert(curr->gtRegPair == regPair);
- regSet.rsMarkRegPairUsed(curr);
- }
- }
-#endif // _TARGET_ARM_
- else if (curArgTabEntry->regNum == REG_STK)
- {
- // The arg is passed in the outgoing argument area of the stack frame
- //
- genCodeForTree(curr, 0);
- assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
-
- inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
-
- if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
- }
- else
- {
- if (!varTypeIsFloating(curr->gtType))
- {
- genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
- assert(curr->gtRegNum == regNum);
- regSet.rsMarkRegUsed(curr);
- }
- else // varTypeIsFloating(curr->gtType)
- {
- if (genIsValidFloatReg(regNum))
- {
- genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
- false);
- assert(curr->gtRegNum == regNum);
- regSet.rsMarkRegUsed(curr);
- }
- else
- {
- genCodeForTree(curr, 0);
- // If we are loading a floating point type into integer registers
- // then it must be for varargs.
- // genCodeForTree will load it into a floating point register,
- // now copy it into the correct integer register(s)
- if (curr->TypeGet() == TYP_FLOAT)
- {
- assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
- regSet.rsSpillRegIfUsed(regNum);
-#ifdef _TARGET_ARM_
- getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
-#else
-#error "Unsupported target"
-#endif
- regTracker.rsTrackRegTrash(regNum);
-
- curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register
- curr->gtRegNum = regNum;
- regSet.rsMarkRegUsed(curr);
- }
- else
- {
- assert(curr->TypeGet() == TYP_DOUBLE);
- regNumber intRegNumLo = regNum;
- curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
-#ifdef _TARGET_ARM_
- regNumber intRegNumHi = regNumber(intRegNumLo + 1);
- assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
- assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
- regSet.rsSpillRegIfUsed(intRegNumHi);
- regSet.rsSpillRegIfUsed(intRegNumLo);
-
- getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
- regTracker.rsTrackRegTrash(intRegNumLo);
- regTracker.rsTrackRegTrash(intRegNumHi);
- curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
- regSet.rsMarkRegPairUsed(curr);
-#else
-#error "Unsupported target"
-#endif
- }
- }
- }
- }
- }
-
- /* If any of the previously loaded arguments were spilled - reload them */
-
- for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
- {
- curr = lateArgs->Current();
- assert(curr);
-
- if (curr->gtFlags & GTF_SPILLED)
- {
- if (isRegPairType(curr->gtType))
- {
- regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
- }
- else
- {
- regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
- }
- }
- }
-}
-
-#ifdef _TARGET_ARM_
-
-// 'Push' a single GT_MKREFANY argument onto a call's argument list
-// The argument is passed as described by the fgArgTabEntry
-// If any part of the struct is to be passed in a register the
-// regNum value will be equal to the the registers used to pass the
-// the first part of the struct.
-// If any part is to go onto the stack, we first generate the
-// value into a register specified by 'regNeedMask' and
-// then store it to the out going argument area.
-// When this method returns, both parts of the TypeReference have
-// been pushed onto the stack, but *no* registers have been marked
-// as 'in-use', that is the responsibility of the caller.
-//
-void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask)
-{
- regNumber regNum = curArgTabEntry->regNum;
- regNumber regNum2;
- assert(mkRefAnyTree->gtOper == GT_MKREFANY);
- regMaskTP arg1RegMask = 0;
- int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
-
- // Construct the TypedReference directly into the argument list of the call by
- // 'pushing' the first field of the typed reference: the pointer.
- // Do this by directly generating it into the argument register or outgoing arg area of the stack.
- // Mark it as used so we don't trash it while generating the second field.
- //
- if (regNum == REG_STK)
- {
- genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL);
- regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
- inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
- regTracker.rsTrackRegTrash(tmpReg1);
- argOffset += TARGET_POINTER_SIZE;
- regNum2 = REG_STK;
- }
- else
- {
- assert(regNum <= REG_ARG_LAST);
- arg1RegMask = genRegMask(regNum);
- genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
- regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
- }
-
- // Now 'push' the second field of the typed reference: the method table.
- if (regNum2 == REG_STK)
- {
- genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL);
- regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
- inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
- regTracker.rsTrackRegTrash(tmpReg2);
- }
- else
- {
- assert(regNum2 <= REG_ARG_LAST);
- // We don't have to mark this register as being in use here because it will
- // be done by the caller, and we don't want to double-count it.
- genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
- }
-
- // Now that we are done generating the second part of the TypeReference, we can mark
- // the first register as free.
- // The caller in the shared path we will re-mark all registers used by this argument
- // as being used, so we don't want to double-count this one.
- if (arg1RegMask != 0)
- {
- GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
- if (op1->gtFlags & GTF_SPILLED)
- {
- /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
-
- regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
- }
- else
- {
- regSet.rsMarkRegFree(arg1RegMask);
- }
- }
-}
-#endif // _TARGET_ARM_
-
-#endif // FEATURE_FIXED_OUT_ARGS
-
-regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
-{
- assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT);
-
- regMaskTP fptrRegs;
-
- /* Loading the indirect call target might cause one or more of the previously
- loaded argument registers to be spilled. So, we save information about all
- the argument registers, and unspill any of them that get spilled, after
- the call target is loaded.
- */
- struct
- {
- GenTreePtr node;
- union {
- regNumber regNum;
- regPairNo regPair;
- };
- } regArgTab[MAX_REG_ARG];
-
- /* Record the previously loaded arguments, if any */
-
- unsigned regIndex;
- regMaskTP prefRegs = regSet.rsRegMaskFree();
- regMaskTP argRegs = RBM_NONE;
- for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
- {
- regMaskTP mask;
- regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT);
- GenTreePtr argTree = regSet.rsUsedTree[regNum];
- regArgTab[regIndex].node = argTree;
- if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
- {
- assert(argTree->gtFlags & GTF_REG_VAL);
- if (isRegPairType(argTree->gtType))
- {
- regPairNo regPair = argTree->gtRegPair;
- assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
- regArgTab[regIndex].regPair = regPair;
- mask = genRegPairMask(regPair);
- }
- else
- {
- assert(regNum == argTree->gtRegNum);
- regArgTab[regIndex].regNum = regNum;
- mask = genRegMask(regNum);
- }
- assert(!(prefRegs & mask));
- argRegs |= mask;
- }
- }
-
- /* Record the register(s) used for the indirect call func ptr */
- fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
-
- /* If any of the previously loaded arguments were spilled, reload them */
-
- for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
- {
- GenTreePtr argTree = regArgTab[regIndex].node;
- if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
- {
- assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
- if (isRegPairType(argTree->gtType))
- {
- regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
- }
- else
- {
- regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
- }
- }
- }
-
- /* Make sure the target is still addressable while avoiding the argument registers */
-
- fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs);
-
- return fptrRegs;
-}
-
-/*****************************************************************************
- *
- * Generate code for a call. If the call returns a value in register(s), the
- * register mask that describes where the result will be found is returned;
- * otherwise, RBM_NONE is returned.
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
-{
- emitAttr retSize;
- size_t argSize;
- size_t args;
- regMaskTP retVal;
- emitter::EmitCallType emitCallType;
-
- unsigned saveStackLvl;
-
- BasicBlock* returnLabel = DUMMY_INIT(NULL);
- LclVarDsc* frameListRoot = NULL;
-
- unsigned savCurIntArgReg;
- unsigned savCurFloatArgReg;
-
- unsigned areg;
-
- regMaskTP fptrRegs = RBM_NONE;
- regMaskTP vptrMask = RBM_NONE;
-
-#ifdef DEBUG
- unsigned stackLvl = getEmitter()->emitCurStackLvl;
-
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tBeg call ");
- Compiler::printTreeID(call);
- printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
- }
-#endif
-
- gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType;
- IL_OFFSETX ilOffset = BAD_IL_OFFSET;
-
- CORINFO_SIG_INFO* sigInfo = nullptr;
-
-#ifdef DEBUGGING_SUPPORT
- if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
- {
- (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
- }
-#endif
-
- /* Make some sanity checks on the call node */
-
- // This is a call
- noway_assert(call->IsCall());
- // "this" only makes sense for user functions
- noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
- // tailcalls won't be done for helpers, caller-pop args, and check that
- // the global flag is set
- noway_assert(!call->gtCall.IsTailCall() ||
- (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
-
-#ifdef DEBUG
- // Pass the call signature information down into the emitter so the emitter can associate
- // native call sites with the signatures they were generated from.
- if (callType != CT_HELPER)
- {
- sigInfo = call->gtCall.callSig;
- }
-#endif // DEBUG
-
- unsigned pseudoStackLvl = 0;
-
- if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
- {
- noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
-
- pseudoStackLvl = genStackLevel;
-
- noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
- "so fgAddCodeRef() should have set isFramePointerRequired()");
- }
-
- /* Mark the current stack level and list of pointer arguments */
-
- saveStackLvl = genStackLevel;
-
- /*-------------------------------------------------------------------------
- * Set up the registers and arguments
- */
-
- /* We'll keep track of how much we've pushed on the stack */
-
- argSize = 0;
-
- /* We need to get a label for the return address with the proper stack depth. */
- /* For the callee pops case (the default) that is before the args are pushed. */
-
- if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
- {
- returnLabel = genCreateTempLabel();
- }
-
- /*
- Make sure to save the current argument register status
- in case we have nested calls.
- */
-
- noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
- savCurIntArgReg = intRegState.rsCurRegArgNum;
- savCurFloatArgReg = floatRegState.rsCurRegArgNum;
- intRegState.rsCurRegArgNum = 0;
- floatRegState.rsCurRegArgNum = 0;
-
- /* Pass the arguments */
-
- if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL))
- {
- argSize += genPushArgList(call);
- }
-
- /* We need to get a label for the return address with the proper stack depth. */
- /* For the caller pops case (cdecl) that is after the args are pushed. */
-
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- {
- if (call->gtFlags & GTF_CALL_POP_ARGS)
- returnLabel = genCreateTempLabel();
-
- /* Make sure that we now have a label */
- noway_assert(returnLabel != DUMMY_INIT(NULL));
- }
-
- if (callType == CT_INDIRECT)
- {
- fptrRegs = genLoadIndirectCallTarget(call);
- }
-
- /* Make sure any callee-trashed registers are saved */
-
- regMaskTP calleeTrashedRegs = RBM_NONE;
-
-#if GTF_CALL_REG_SAVE
- if (call->gtFlags & GTF_CALL_REG_SAVE)
- {
- /* The return value reg(s) will definitely be trashed */
-
- switch (call->gtType)
- {
- case TYP_INT:
- case TYP_REF:
- case TYP_BYREF:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
-#endif
- calleeTrashedRegs = RBM_INTRET;
- break;
-
- case TYP_LONG:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_DOUBLE:
-#endif
- calleeTrashedRegs = RBM_LNGRET;
- break;
-
- case TYP_VOID:
-#if CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
- case TYP_DOUBLE:
-#endif
- calleeTrashedRegs = 0;
- break;
-
- default:
- noway_assert(!"unhandled/unexpected type");
- }
- }
- else
-#endif
- {
- calleeTrashedRegs = RBM_CALLEE_TRASH;
- }
-
- /* Spill any callee-saved registers which are being used */
-
- regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
-
- /* We need to save all GC registers to the InlinedCallFrame.
- Instead, just spill them to temps. */
-
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
-
- // Ignore fptrRegs as it is needed only to perform the indirect call
-
- spillRegs &= ~fptrRegs;
-
- /* Do not spill the argument registers.
- Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
-
- noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0);
- spillRegs &= ~call->gtCall.gtCallRegUsedMask;
-
- if (spillRegs)
- {
- regSet.rsSpillRegs(spillRegs);
- }
-
-#if FEATURE_STACK_FP_X87
- // Spill fp stack
- SpillForCallStackFP();
-
- if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
- {
- // Pick up a reg
- regNumber regReturn = regSet.PickRegFloat();
-
- // Assign reg to tree
- genMarkTreeInReg(call, regReturn);
-
- // Mark as used
- regSet.SetUsedRegFloat(call, true);
-
- // Update fp state
- compCurFPState.Push(regReturn);
- }
-#else
- SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask);
-#endif
-
- /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
-
- retSize = EA_PTRSIZE;
-
- if (valUsed)
- {
- if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
- {
- retSize = EA_GCREF;
- }
- else if (call->gtType == TYP_BYREF)
- {
- retSize = EA_BYREF;
- }
- }
-
- /*-------------------------------------------------------------------------
- * For caller-pop calls, the GC info will report the arguments as pending
- arguments as the caller explicitly pops them. Also should be
- reported as non-GC arguments as they effectively go dead at the
- call site (callee owns them)
- */
-
- args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
-
-#ifdef PROFILING_SUPPORTED
-
- /*-------------------------------------------------------------------------
- * Generate the profiling hooks for the call
- */
-
- /* Treat special cases first */
-
- /* fire the event at the call site */
- /* alas, right now I can only handle calls via a method handle */
- if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->gtCall.IsTailCall())
- {
- unsigned saveStackLvl2 = genStackLevel;
-
- //
- // Push the profilerHandle
- //
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_X86_
- regMaskTP byrefPushedRegs;
- regMaskTP norefPushedRegs;
- regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
- (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
- }
- genSinglePush();
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
- sizeof(int) * 1, // argSize
- EA_UNKNOWN); // retSize
-
- //
- // Adjust the number of stack slots used by this managed method if necessary.
- //
- if (compiler->fgPtrArgCntMax < 1)
- {
- compiler->fgPtrArgCntMax = 1;
- }
-
- genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
-#elif _TARGET_ARM_
- // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
- // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
- // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
- //
- // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
- // to disturb them and hence argument registers are locked here.
- regMaskTP usedMask = RBM_NONE;
- regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
-
- regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
- regSet.rsLockReg(genRegMask(scratchReg));
-
- emitAttr attr = EA_UNKNOWN;
- if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
- {
- attr = EA_GCREF;
- gcInfo.gcMarkRegSetGCref(scratchReg);
- }
- else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
- {
- attr = EA_BYREF;
- gcInfo.gcMarkRegSetByref(scratchReg);
- }
- else
- {
- attr = EA_4BYTE;
- }
-
- getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
- regTracker.rsTrackRegTrash(scratchReg);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
- regTracker.rsTrackRegTrash(REG_R0);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
- }
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
- 0, // argSize
- EA_UNKNOWN); // retSize
-
- // Restore back to the state that existed before profiler callback
- gcInfo.gcMarkRegSetNpt(scratchReg);
- getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
- regTracker.rsTrackRegTrash(REG_R0);
- regSet.rsUnlockReg(genRegMask(scratchReg));
- regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
-#else
- NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
-#endif //_TARGET_X86_
-
- /* Restore the stack level */
- genStackLevel = saveStackLvl2;
- }
-
-#endif // PROFILING_SUPPORTED
-
-#ifdef DEBUG
- /*-------------------------------------------------------------------------
- * Generate an ESP check for the call
- */
-
- if (compiler->opts.compStackCheckOnCall
-#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
- // check the stacks as frequently as possible
- && !call->IsHelperCall()
-#else
- && call->gtCall.gtCallType == CT_USER_FUNC
-#endif
- )
- {
- noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
- }
-#endif
-
- /*-------------------------------------------------------------------------
- * Generate the call
- */
-
- bool fPossibleSyncHelperCall = false;
- CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
-
- bool fTailCallTargetIsVSD = false;
-
- bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
-
- /* Check for Delegate.Invoke. If so, we inline it. We get the
- target-object and target-function from the delegate-object, and do
- an indirect call.
- */
-
- if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
- {
- noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
-
- assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) &
- (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
- (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
-
- /* Find the offsets of the 'this' pointer and new target */
-
- CORINFO_EE_INFO* pInfo;
- unsigned instOffs; // offset of new 'this' pointer
- unsigned firstTgtOffs; // offset of first target to invoke
- const regNumber regThis = genGetThisArgReg(call);
-
- pInfo = compiler->eeGetEEInfo();
- instOffs = pInfo->offsetOfDelegateInstance;
- firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
-
-#ifdef _TARGET_ARM_
- if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
- {
- getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_VIRTUAL_STUB_PARAM, regThis,
- pInfo->offsetOfSecureDelegateIndirectCell);
- regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
- }
-#endif // _TARGET_ARM_
-
- // Grab an available register to use for the CALL indirection
- regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
-
- // Save the invoke-target-function in indCallReg
- // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
- regTracker.rsTrackRegTrash(indCallReg);
-
- /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
- regTracker.rsTrackRegTrash(regThis);
- noway_assert(instOffs < 127);
-
- /* Call through indCallReg */
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
- NULL, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
- }
- else
-
- /*-------------------------------------------------------------------------
- * Virtual and interface calls
- */
-
- switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
- {
- case GTF_CALL_VIRT_STUB:
- {
- regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
-
- // An x86 JIT which uses full stub dispatch must generate only
- // the following stub dispatch calls:
- //
- // (1) isCallRelativeIndirect:
- // call dword ptr [rel32] ; FF 15 ---rel32----
- // (2) isCallRelative:
- // call abc ; E8 ---rel32----
- // (3) isCallRegisterIndirect:
- // 3-byte nop ;
- // call dword ptr [eax] ; FF 10
- //
- // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
- // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
-
- //
- // Please do not insert any Random NOPs while constructing this VSD call
- //
- getEmitter()->emitDisableRandomNops();
-
- if (!fTailCall)
- {
- // This is code to set up an indirect call to a stub address computed
- // via dictionary lookup. However the dispatch stub receivers aren't set up
- // to accept such calls at the moment.
- if (callType == CT_INDIRECT)
- {
- regNumber indReg;
-
- // -------------------------------------------------------------------------
- // The importer decided we needed a stub call via a computed
- // stub dispatch address, i.e. an address which came from a dictionary lookup.
- // - The dictionary lookup produces an indirected address, suitable for call
- // via "call [REG_VIRTUAL_STUB_PARAM]"
- //
- // This combination will only be generated for shared generic code and when
- // stub dispatch is active.
-
- // No need to null check the this pointer - the dispatch code will deal with this.
-
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
-
- // Now put the address in REG_VIRTUAL_STUB_PARAM.
- // This is typically a nop when the register used for
- // the gtCallAddr is REG_VIRTUAL_STUB_PARAM
- //
- inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr);
- regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
-
-#if defined(_TARGET_X86_)
- // Emit enough bytes of nops so that this sequence can be distinguished
- // from other virtual stub dispatch calls.
- //
- // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
- // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
- //
- getEmitter()->emitIns_Nop(3);
-
- // Make the virtual stub call:
- // call [REG_VIRTUAL_STUB_PARAM]
- //
- emitCallType = emitter::EC_INDIR_ARD;
-
- indReg = REG_VIRTUAL_STUB_PARAM;
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
-
-#elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
-
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
-
- // Make the virtual stub call:
- // ldr indReg, [REG_VIRTUAL_STUB_PARAM]
- // call indReg
- //
- emitCallType = emitter::EC_INDIR_R;
-
- // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg'
- //
- indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM);
- assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0);
- regTracker.rsTrackRegTrash(indReg);
-
-#else
-#error "Unknown target for VSD call"
-#endif
-
- getEmitter()->emitIns_Call(emitCallType,
- NULL, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, indReg);
- }
- else
- {
- // -------------------------------------------------------------------------
- // Check for a direct stub call.
- //
-
- // Get stub addr. This will return NULL if virtual call stubs are not active
- void* stubAddr = NULL;
-
- stubAddr = (void*)call->gtCall.gtStubCallStubAddr;
-
- noway_assert(stubAddr != NULL);
-
- // -------------------------------------------------------------------------
- // Direct stub calls, though the stubAddr itself may still need to be
- // accesed via an indirection.
- //
-
- // No need to null check - the dispatch code will deal with null this.
-
- emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
- void* addr = stubAddr;
- int disp = 0;
- regNumber callReg = REG_NA;
-
- if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
- {
-#if CPU_LOAD_STORE_ARCH
- callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM);
- noway_assert(callReg == REG_VIRTUAL_STUB_PARAM);
-
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_VIRTUAL_STUB_PARAM, (ssize_t)stubAddr);
- // The stub will write-back to this register, so don't track it
- regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM,
- REG_VIRTUAL_STUB_PARAM, 0);
- regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
- callTypeStubAddr = emitter::EC_INDIR_R;
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
- NULL, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM);
-
-#else
- // emit an indirect call
- callTypeStubAddr = emitter::EC_INDIR_C;
- addr = 0;
- disp = (ssize_t)stubAddr;
-#endif
- }
-#if CPU_LOAD_STORE_ARCH
- if (callTypeStubAddr != emitter::EC_INDIR_R)
-#endif
- {
- getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCall.gtCallMethHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
- }
- }
- }
- else // tailCall is true
- {
-
-// Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
-// moves the this pointer out of it's usual place and into the argument list.
-#ifdef _TARGET_X86_
-
- // Generate "cmp ECX, [ECX]" to trap null pointers
- const regNumber regThis = genGetThisArgReg(call);
- getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
-
-#endif // _TARGET_X86_
-
- if (callType == CT_INDIRECT)
- {
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
-
- // Now put the address in EAX.
- inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
- regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
-
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
- }
- else
- {
- // importer/EE should guarantee the indirection
- noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
-
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
- ssize_t(call->gtCall.gtStubCallStubAddr));
- }
-
- fTailCallTargetIsVSD = true;
- }
-
- //
- // OK to start inserting random NOPs again
- //
- getEmitter()->emitEnableRandomNops();
- }
- break;
-
- case GTF_CALL_VIRT_VTABLE:
- // stub dispatching is off or this is not a virtual call (could be a tailcall)
- {
- regNumber vptrReg;
- unsigned vtabOffsOfIndirection;
- unsigned vtabOffsAfterIndirection;
-
- noway_assert(callType == CT_USER_FUNC);
-
- vptrReg =
- regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
- vptrMask = genRegMask(vptrReg);
-
- /* The register no longer holds a live pointer value */
- gcInfo.gcMarkRegSetNpt(vptrMask);
-
- // MOV vptrReg, [REG_CALL_THIS + offs]
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
- VPTR_OFFS);
- regTracker.rsTrackRegTrash(vptrReg);
-
- noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask);
-
- /* Get hold of the vtable offset (note: this might be expensive) */
-
- compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd,
- &vtabOffsOfIndirection,
- &vtabOffsAfterIndirection);
-
- /* Get the appropriate vtable chunk */
-
- /* The register no longer holds a live pointer value */
- gcInfo.gcMarkRegSetNpt(vptrMask);
-
- // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
- vtabOffsOfIndirection);
-
- /* Call through the appropriate vtable slot */
-
- if (fTailCall)
- {
- /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
- vtabOffsAfterIndirection);
- }
- else
- {
-#if CPU_LOAD_STORE_ARCH
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
- vtabOffsAfterIndirection);
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCall.gtCallMethHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset,
- vptrReg); // ireg
-#else
- getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCall.gtCallMethHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset,
- vptrReg, // ireg
- REG_NA, // xreg
- 0, // xmul
- vtabOffsAfterIndirection); // disp
-#endif // CPU_LOAD_STORE_ARCH
- }
- }
- break;
-
- case GTF_CALL_NONVIRT:
- {
- //------------------------ Non-virtual/Indirect calls -------------------------
- // Lots of cases follow
- // - Direct P/Invoke calls
- // - Indirect calls to P/Invoke functions via the P/Invoke stub
- // - Direct Helper calls
- // - Indirect Helper calls
- // - Direct calls to known addresses
- // - Direct calls where address is accessed by one or two indirections
- // - Indirect calls to computed addresses
- // - Tailcall versions of all of the above
-
- CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
-
- //------------------------------------------------------
- // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
- //
- // For (final and private) functions which were called with
- // invokevirtual, but which we call directly, we need to
- // dereference the object pointer to make sure it's not NULL.
- //
-
- if (call->gtFlags & GTF_CALL_NULLCHECK)
- {
- /* Generate "cmp ECX, [ECX]" to trap null pointers */
- const regNumber regThis = genGetThisArgReg(call);
-#if CPU_LOAD_STORE_ARCH
- regNumber indReg =
- regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
- regTracker.rsTrackRegTrash(indReg);
-#else
- getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
-#endif
- }
-
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- {
- //------------------------------------------------------
- // Non-virtual/Indirect calls: PInvoke calls.
-
- noway_assert(compiler->info.compCallUnmanaged != 0);
-
- /* args shouldn't be greater than 64K */
-
- noway_assert((argSize & 0xffff0000) == 0);
-
- /* Remember the varDsc for the callsite-epilog */
-
- frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
-
- // exact codegen is required
- getEmitter()->emitDisableRandomNops();
-
- int nArgSize = 0;
-
- regNumber indCallReg = REG_NA;
-
- if (callType == CT_INDIRECT)
- {
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
-
- if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL)
- indCallReg = call->gtCall.gtCallAddr->gtRegNum;
-
- nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
- methHnd = 0;
- }
- else
- {
- noway_assert(callType == CT_USER_FUNC);
- }
-
- regNumber tcbReg;
- tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
-
- void* addr = NULL;
-
- if (callType == CT_INDIRECT)
- {
- /* Double check that the callee didn't use/trash the
- registers holding the call target.
- */
- noway_assert(tcbReg != indCallReg);
-
- if (indCallReg == REG_NA)
- {
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
- // indirection
-
- /* Please note that this even works with tcbReg == REG_EAX.
- tcbReg contains an interesting value only if frameListRoot is
- an enregistered local that stays alive across the call
- (certainly not EAX). If frameListRoot has been moved into
- EAX, we can trash it since it won't survive across the call
- anyways.
- */
-
- inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr);
- regTracker.rsTrackRegTrash(indCallReg);
- }
-
- emitCallType = emitter::EC_INDIR_R;
- }
- else
- {
- noway_assert(callType == CT_USER_FUNC);
-
- void* pAddr;
- addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
- if (addr != NULL)
- {
-#if CPU_LOAD_STORE_ARCH
- // Load the address into a register, indirect it and call through a register
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
- // indirection
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regTracker.rsTrackRegTrash(indCallReg);
- // Now make the call "call indCallReg"
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
- methHnd, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
- NULL, // addr
- args,
- retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
-
- emitCallType = emitter::EC_INDIR_R;
- break;
-#else
- emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
- indCallReg = REG_NA;
-#endif
- }
- else
- {
- // Double-indirection. Load the address into a register
- // and call indirectly through a register
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
- // indirection
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regTracker.rsTrackRegTrash(indCallReg);
-
- emitCallType = emitter::EC_INDIR_R;
-
-#else
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
- regTracker.rsTrackRegTrash(indCallReg);
- emitCallType = emitter::EC_INDIR_ARD;
-
-#endif // CPU_LOAD_STORE_ARCH
- }
- }
-
- getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
- INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
- ilOffset, indCallReg);
-
- if (callType == CT_INDIRECT)
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
-
- getEmitter()->emitEnableRandomNops();
-
- // Done with PInvoke calls
- break;
- }
-
- if (callType == CT_INDIRECT)
- {
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
-
- if (call->gtCall.gtCallCookie)
- {
- //------------------------------------------------------
- // Non-virtual indirect calls via the P/Invoke stub
-
- GenTreePtr cookie = call->gtCall.gtCallCookie;
- GenTreePtr target = call->gtCall.gtCallAddr;
-
- noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
-
- noway_assert(cookie->gtOper == GT_CNS_INT ||
- cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
-
- noway_assert(args == argSize);
-
-#if defined(_TARGET_X86_)
- /* load eax with the real target */
-
- inst_RV_TT(INS_mov, REG_EAX, target);
- regTracker.rsTrackRegTrash(REG_EAX);
-
- if (cookie->gtOper == GT_CNS_INT)
- inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
- else
- inst_TT(INS_push, cookie);
-
- /* Keep track of ESP for EBP-less frames */
- genSinglePush();
-
- argSize += sizeof(void*);
-
-#elif defined(_TARGET_ARM_)
-
- // Ensure that we spill these registers (if caller saved) in the prolog
- regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
-
- // ARM: load r12 with the real target
- // X64: load r10 with the real target
- inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
- regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
-
- // ARM: load r4 with the pinvoke VASigCookie
- // X64: load r11 with the pinvoke VASigCookie
- if (cookie->gtOper == GT_CNS_INT)
- inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
- EA_HANDLE_CNS_RELOC);
- else
- inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
- regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
-
- noway_assert(args == argSize);
-
- // Ensure that we don't trash any of these registers if we have to load
- // the helper call target into a register to invoke it.
- regMaskTP regsUsed;
- regSet.rsLockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
- RBM_PINVOKE_COOKIE_PARAM,
- &regsUsed);
-#else
- NYI("Non-virtual indirect calls via the P/Invoke stub");
-#endif
-
- args = argSize;
- noway_assert((size_t)(int)args == args);
-
- genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
-
-#if defined(_TARGET_ARM_)
- regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
- RBM_PINVOKE_COOKIE_PARAM,
- regsUsed);
-#endif
-
-#ifdef _TARGET_ARM_
- // genEmitHelperCall doesn't record all registers a helper call would trash.
- regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
-#endif
- }
- else
- {
- //------------------------------------------------------
- // Non-virtual indirect calls
-
- if (fTailCall)
- {
- inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
- regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
- }
- else
- instEmit_indCall(call, args, retSize);
- }
-
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
-
- // Done with indirect calls
- break;
- }
-
- //------------------------------------------------------
- // Non-virtual direct/indirect calls: Work out if the address of the
- // call is known at JIT time (if not it is either an indirect call
- // or the address must be accessed via an single/double indirection)
-
- noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
-
- void* addr;
- InfoAccessType accessType;
-
- helperNum = compiler->eeGetHelperNum(methHnd);
-
- if (callType == CT_HELPER)
- {
- noway_assert(helperNum != CORINFO_HELP_UNDEF);
-
- void* pAddr;
- addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-
- accessType = IAT_VALUE;
-
- if (!addr)
- {
- accessType = IAT_PVALUE;
- addr = pAddr;
- }
- }
- else
- {
- noway_assert(helperNum == CORINFO_HELP_UNDEF);
-
- CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
-
- if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
- aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
-
- if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
- aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
-
- CORINFO_CONST_LOOKUP addrInfo;
- compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
-
- accessType = addrInfo.accessType;
- addr = addrInfo.addr;
- }
-
- if (fTailCall)
- {
- noway_assert(callType == CT_USER_FUNC);
-
- switch (accessType)
- {
- case IAT_VALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to known addressess
- //
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
- break;
-
- case IAT_PVALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to addresses accessed by
- // a single indirection.
- //
- // For tailcalls we place the target address in REG_TAILCALL_ADDR
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
- {
- regNumber indReg = REG_TAILCALL_ADDR;
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
- regTracker.rsTrackRegTrash(indReg);
- }
-#else
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
- regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
-#endif
- break;
-
- case IAT_PPVALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to addresses accessed by
- // a double indirection.
- //
- // For tailcalls we place the target address in REG_TAILCALL_ADDR
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
- {
- regNumber indReg = REG_TAILCALL_ADDR;
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
- regTracker.rsTrackRegTrash(indReg);
- }
-#else
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
- REG_TAILCALL_ADDR, 0);
- regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
-#endif
- break;
-
- default:
- noway_assert(!"Bad accessType");
- break;
- }
- }
- else
- {
- switch (accessType)
- {
- regNumber indCallReg;
-
- case IAT_VALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to known addressess
- //
- // The vast majority of calls end up here.... Wouldn't
- // it be nice if they all did!
- CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef _TARGET_ARM_
- if (!arm_Valid_Imm_For_BL((ssize_t)addr))
- {
- // Load the address into a register and call through a register
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
- // CALL indirection
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
- indCallReg, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, // isJump
- emitter::emitNoGChelper(helperNum));
- }
- else
-#endif
- {
- getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
- 0, /* ireg, xreg, xmul, disp */
- false, /* isJump */
- emitter::emitNoGChelper(helperNum));
- }
- break;
-
- case IAT_PVALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to addresses accessed by
- // a single indirection.
- //
-
- // Load the address into a register, load indirect and call through a register
- CLANG_FORMAT_COMMENT_ANCHOR;
-#if CPU_LOAD_STORE_ARCH
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
- // indirection
-
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regTracker.rsTrackRegTrash(indCallReg);
-
- emitCallType = emitter::EC_INDIR_R;
- addr = NULL;
-
-#else
- emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
- indCallReg = REG_NA;
-
-#endif // CPU_LOAD_STORE_ARCH
-
- getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
- retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset,
- indCallReg, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, /* isJump */
- emitter::emitNoGChelper(helperNum));
- break;
-
- case IAT_PPVALUE:
- {
- //------------------------------------------------------
- // Non-virtual direct calls to addresses accessed by
- // a double indirection.
- //
- // Double-indirection. Load the address into a register
- // and call indirectly through the register
-
- noway_assert(helperNum == CORINFO_HELP_UNDEF);
-
- // Grab an available register to use for the CALL indirection
- indCallReg = regSet.rsGrabReg(RBM_ALLINT);
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regTracker.rsTrackRegTrash(indCallReg);
-
- emitCallType = emitter::EC_INDIR_R;
-
-#else
-
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
- regTracker.rsTrackRegTrash(indCallReg);
-
- emitCallType = emitter::EC_INDIR_ARD;
-
-#endif // CPU_LOAD_STORE_ARCH
-
- getEmitter()->emitIns_Call(emitCallType, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset,
- indCallReg, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, // isJump
- emitter::emitNoGChelper(helperNum));
- }
- break;
-
- default:
- noway_assert(!"Bad accessType");
- break;
- }
-
- // tracking of region protected by the monitor in synchronized methods
- if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
- {
- fPossibleSyncHelperCall = true;
- }
- }
- }
- break;
-
- default:
- noway_assert(!"strange call type");
- break;
- }
-
- /*-------------------------------------------------------------------------
- * For tailcalls, REG_INTRET contains the address of the target function,
- * enregistered args are in the correct registers, and the stack arguments
- * have been pushed on the stack. Now call the stub-sliding helper
- */
-
- if (fTailCall)
- {
-
- if (compiler->info.compCallUnmanaged)
- genPInvokeMethodEpilog();
-
-#ifdef _TARGET_X86_
- noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
-
- // Push the count of the incoming stack arguments
-
- unsigned nOldStkArgs =
- (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*));
- getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
- genSinglePush(); // Keep track of ESP for EBP-less frames
- args += sizeof(void*);
-
- // Push the count of the outgoing stack arguments
-
- getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*));
- genSinglePush(); // Keep track of ESP for EBP-less frames
- args += sizeof(void*);
-
- // Push info about the callee-saved registers to be restored
- // For now, we always spill all registers if compiler->compTailCallUsed
-
- DWORD calleeSavedRegInfo = 1 | // always restore EDI,ESI,EBX
- (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
- getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
- genSinglePush(); // Keep track of ESP for EBP-less frames
- args += sizeof(void*);
-
- // Push the address of the target function
-
- getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
- genSinglePush(); // Keep track of ESP for EBP-less frames
- args += sizeof(void*);
-
-#else // _TARGET_X86_
-
- args = 0;
- retSize = EA_UNKNOWN;
-
-#endif // _TARGET_X86_
-
- if (compiler->getNeedsGSSecurityCookie())
- {
- genEmitGSCookieCheck(true);
- }
-
- // TailCall helper does not poll for GC. An explicit GC poll
- // Should have been placed in when we morphed this into a tail call.
- noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
-
- // Now call the helper
-
- genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
- }
-
- /*-------------------------------------------------------------------------
- * Done with call.
- * Trash registers, pop arguments if needed, etc
- */
-
- /* Mark the argument registers as free */
-
- noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
-
- for (areg = 0; areg < MAX_REG_ARG; areg++)
- {
- regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
-
- // Is this one of the used argument registers?
- if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
- continue;
-
-#ifdef _TARGET_ARM_
- if (regSet.rsUsedTree[areg] == NULL)
- {
- noway_assert(areg % 2 == 1 &&
- (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
- (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
- continue;
- }
-#endif
-
- regSet.rsMarkRegFree(curArgMask);
-
- // We keep regSet.rsMaskVars current during codegen, so we have to remove any
- // that have been copied into arg regs.
-
- regSet.RemoveMaskVars(curArgMask);
- gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
- gcInfo.gcRegByrefSetCur &= ~(curArgMask);
- }
-
-#if !FEATURE_STACK_FP_X87
- //-------------------------------------------------------------------------
- // free up the FP args
-
- for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
- {
- regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
- regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
-
- // Is this one of the used argument registers?
- if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
- continue;
-
- regSet.rsMaskUsed &= ~curArgMask;
- regSet.rsUsedTree[argRegNum] = NULL;
- }
-#endif // !FEATURE_STACK_FP_X87
-
- /* restore the old argument register status */
-
- intRegState.rsCurRegArgNum = savCurIntArgReg;
- floatRegState.rsCurRegArgNum = savCurFloatArgReg;
-
- noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
-
- /* Mark all trashed registers as such */
-
- if (calleeTrashedRegs)
- regTracker.rsTrashRegSet(calleeTrashedRegs);
-
- regTracker.rsTrashRegsForGCInterruptability();
-
-#ifdef DEBUG
-
- if (!(call->gtFlags & GTF_CALL_POP_ARGS))
- {
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tEnd call ");
- Compiler::printTreeID(call);
- printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
- }
- noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
- }
-
-#endif
-
-#if FEATURE_STACK_FP_X87
- /* All float temps must be spilled around function calls */
- if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
- {
- noway_assert(compCurFPState.m_uStackSize == 1);
- }
- else
- {
- noway_assert(compCurFPState.m_uStackSize == 0);
- }
-#else
- if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
- {
-#ifdef _TARGET_ARM_
- if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP)
- {
- // Result return for vararg methods is in r0, r1, but our callers would
- // expect the return in s0, s1 because of floating type. Do the move now.
- if (call->gtType == TYP_FLOAT)
- {
- inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
- }
- else
- {
- inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
- }
- }
-#endif
- genMarkTreeInReg(call, REG_FLOATRET);
- }
-#endif
-
- /* The function will pop all arguments before returning */
-
- genStackLevel = saveStackLvl;
-
- /* No trashed registers may possibly hold a pointer at this point */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
-
- regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
- ~regSet.rsMaskVars & ~vptrMask;
- if (ptrRegs)
- {
- // A reg may be dead already. The assertion is too strong.
- LclVarDsc* varDsc;
- unsigned varNum;
-
- // use compiler->compCurLife
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
- {
- /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
-
- if (!varDsc->lvTracked)
- continue;
- if (!varDsc->lvRegister)
- continue;
- if (varDsc->IsFloatRegType())
- continue;
-
- /* Get hold of the index and the bitmask for the variable */
-
- unsigned varIndex = varDsc->lvVarIndex;
-
- /* Is this variable live currently? */
-
- if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
- {
- regNumber regNum = varDsc->lvRegNum;
- regMaskTP regMask = genRegMask(regNum);
-
- if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
- ptrRegs &= ~regMask;
- }
- }
- if (ptrRegs)
- {
- printf("Bad call handling for ");
- Compiler::printTreeID(call);
- printf("\n");
- noway_assert(!"A callee trashed reg is holding a GC pointer");
- }
- }
-#endif
-
-#if defined(_TARGET_X86_)
- //-------------------------------------------------------------------------
- // Create a label for tracking of region protected by the monitor in synchronized methods.
- // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
- // so the GC state vars have been updated before creating the label.
-
- if (fPossibleSyncHelperCall)
- {
- switch (helperNum)
- {
- case CORINFO_HELP_MON_ENTER:
- case CORINFO_HELP_MON_ENTER_STATIC:
- noway_assert(compiler->syncStartEmitCookie == NULL);
- compiler->syncStartEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
- noway_assert(compiler->syncStartEmitCookie != NULL);
- break;
- case CORINFO_HELP_MON_EXIT:
- case CORINFO_HELP_MON_EXIT_STATIC:
- noway_assert(compiler->syncEndEmitCookie == NULL);
- compiler->syncEndEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
- noway_assert(compiler->syncEndEmitCookie != NULL);
- break;
- default:
- break;
- }
- }
-#endif // _TARGET_X86_
-
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- {
- genDefineTempLabel(returnLabel);
-
-#ifdef _TARGET_X86_
- if (getInlinePInvokeCheckEnabled())
- {
- noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
- BasicBlock* esp_check;
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
- /* mov ecx, dword ptr [frame.callSiteTracker] */
-
- getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
- regTracker.rsTrackRegTrash(REG_ARG_0);
-
- /* Generate the conditional jump */
-
- if (!(call->gtFlags & GTF_CALL_POP_ARGS))
- {
- if (argSize)
- {
- getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
- }
- }
- /* cmp ecx, esp */
-
- getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
-
- esp_check = genCreateTempLabel();
-
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
-
- getEmitter()->emitIns(INS_BREAKPOINT);
-
- /* genCondJump() closes the current emitter block */
-
- genDefineTempLabel(esp_check);
- }
-#endif
- }
-
- /* Are we supposed to pop the arguments? */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_X86_)
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- {
- if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) ||
- compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
- {
- // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
- // take care of the cdecl argument popping here as well but the stack depth tracking logic
- // makes this very hard, i.e. it needs to "see" the actual pop.
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
-
- if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
- {
- /* mov esp, dword ptr [frame.callSiteTracker] */
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
- compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
- }
- else
- {
- /* mov ecx, dword ptr [frame.callSiteTracker] */
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
- compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
- regTracker.rsTrackRegTrash(REG_ARG_0);
-
- /* lea esp, [ecx + argSize] */
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
- }
- }
- }
-#endif // _TARGET_X86_
-
- if (call->gtFlags & GTF_CALL_POP_ARGS)
- {
- noway_assert(args == (size_t) - (int)argSize);
-
- if (argSize)
- {
- genAdjustSP(argSize);
- }
- }
-
- if (pseudoStackLvl)
- {
- noway_assert(call->gtType == TYP_VOID);
-
- /* Generate NOP */
-
- instGen(INS_nop);
- }
-
- /* What does the function return? */
-
- retVal = RBM_NONE;
-
- switch (call->gtType)
- {
- case TYP_REF:
- case TYP_ARRAY:
- case TYP_BYREF:
- gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
-
- __fallthrough;
-
- case TYP_INT:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
-#endif
- retVal = RBM_INTRET;
- break;
-
-#ifdef _TARGET_ARM_
- case TYP_STRUCT:
- {
- assert(call->gtCall.gtRetClsHnd != NULL);
- assert(compiler->IsHfa(call->gtCall.gtRetClsHnd));
- int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd);
- assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
- assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
- retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
- }
- break;
-#endif
-
- case TYP_LONG:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_DOUBLE:
-#endif
- retVal = RBM_LNGRET;
- break;
-
-#if CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
- case TYP_DOUBLE:
-
- break;
-#endif
-
- case TYP_VOID:
- break;
-
- default:
- noway_assert(!"unexpected/unhandled fn return type");
- }
-
- // We now have to generate the "call epilog" (if it was a call to unmanaged code).
- /* if it is a call to unmanaged code, frameListRoot must be set */
-
- noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
-
- if (frameListRoot)
- genPInvokeCallEpilog(frameListRoot, retVal);
-
- if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
- {
- if (frameListRoot->lvRegister)
- {
- bool isBorn = false;
- bool isDying = true;
- genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
- }
- }
-
-#ifdef DEBUG
- if (compiler->opts.compStackCheckOnCall
-#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
- // check the stack as frequently as possible
- && !call->IsHelperCall()
-#else
- && call->gtCall.gtCallType == CT_USER_FUNC
-#endif
- )
- {
- noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
- if (argSize > 0)
- {
- getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
- getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
- getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
- regTracker.rsTrackRegTrash(REG_ARG_0);
- }
- else
- getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
-
- BasicBlock* esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
- }
-#endif // DEBUG
-
-#if FEATURE_STACK_FP_X87
- UnspillRegVarsStackFp();
-#endif // FEATURE_STACK_FP_X87
-
- if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
- {
- // Restore return node if necessary
- if (call->gtFlags & GTF_SPILLED)
- {
- UnspillFloat(call);
- }
-
-#if FEATURE_STACK_FP_X87
- // Mark as free
- regSet.SetUsedRegFloat(call, false);
-#endif
- }
-
-#if FEATURE_STACK_FP_X87
-#ifdef DEBUG
- if (compiler->verbose)
- {
- JitDumpFPState();
- }
-#endif
-#endif
-
- return retVal;
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-/*****************************************************************************
- *
- * Create and record GC Info for the function.
- */
-#ifdef JIT32_GCENCODER
-void*
-#else
-void
-#endif
-CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
-{
-#ifdef JIT32_GCENCODER
- return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
-#else
- genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
-#endif
-}
-
-#ifdef JIT32_GCENCODER
-void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
- unsigned prologSize,
- unsigned epilogSize DEBUGARG(void* codePtr))
-{
- BYTE headerBuf[64];
- InfoHdr header;
-
- int s_cached;
-#ifdef DEBUG
- size_t headerSize =
-#endif
- compiler->compInfoBlkSize =
- gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
-
- size_t argTabOffset = 0;
- size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
-
-#if DISPLAY_SIZES
-
- if (genInterruptible)
- {
- gcHeaderISize += compiler->compInfoBlkSize;
- gcPtrMapISize += ptrMapSize;
- }
- else
- {
- gcHeaderNSize += compiler->compInfoBlkSize;
- gcPtrMapNSize += ptrMapSize;
- }
-
-#endif // DISPLAY_SIZES
-
- compiler->compInfoBlkSize += ptrMapSize;
-
- /* Allocate the info block for the method */
-
- compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
-
-#if 0 // VERBOSE_SIZES
- // TODO-Review: 'dataSize', below, is not defined
-
-// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
- {
- printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
- compiler->info.compILCodeSize,
- compiler->compInfoBlkSize,
- codeSize + dataSize,
- codeSize + dataSize - prologSize - epilogSize,
- 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
- 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
- compiler->info.compClassName,
- compiler->info.compMethodName);
- }
-
-#endif
-
- /* Fill in the info block and return it to the caller */
-
- void* infoPtr = compiler->compInfoBlkAddr;
-
- /* Create the method info block: header followed by GC tracking tables */
-
- compiler->compInfoBlkAddr +=
- gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
-
- assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
- compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
- assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
-
-#ifdef DEBUG
-
- if (0)
- {
- BYTE* temp = (BYTE*)infoPtr;
- unsigned size = compiler->compInfoBlkAddr - temp;
- BYTE* ptab = temp + headerSize;
-
- noway_assert(size == headerSize + ptrMapSize);
-
- printf("Method info block - header [%u bytes]:", headerSize);
-
- for (unsigned i = 0; i < size; i++)
- {
- if (temp == ptab)
- {
- printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
- printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
- }
- else
- {
- if (!(i % 16))
- printf("\n %04X: ", i);
- }
-
- printf("%02X ", *temp++);
- }
-
- printf("\n");
- }
-
-#endif // DEBUG
-
-#if DUMP_GC_TABLES
-
- if (compiler->opts.dspGCtbls)
- {
- const BYTE* base = (BYTE*)infoPtr;
- unsigned size;
- unsigned methodSize;
- InfoHdr dumpHeader;
-
- printf("GC Info for method %s\n", compiler->info.compFullName);
- printf("GC info size = %3u\n", compiler->compInfoBlkSize);
-
- size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
- // printf("size of header encoding is %3u\n", size);
- printf("\n");
-
- if (compiler->opts.dspGCtbls)
- {
- base += size;
- size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
- // printf("size of pointer table is %3u\n", size);
- printf("\n");
- noway_assert(compiler->compInfoBlkAddr == (base + size));
- }
- }
-
-#ifdef DEBUG
- if (jitOpts.testMask & 128)
- {
- for (unsigned offs = 0; offs < codeSize; offs++)
- {
- gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
- }
- }
-#endif // DEBUG
-#endif // DUMP_GC_TABLES
-
- /* Make sure we ended up generating the expected number of bytes */
-
- noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
-
- return infoPtr;
-}
-
-#else // JIT32_GCENCODER
-
-void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
-{
- IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
- GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
- GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
- assert(gcInfoEncoder);
-
- // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
- gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
-
- // First we figure out the encoder ID's for the stack slots and registers.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
- // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
- gcInfoEncoder->FinalizeSlotIds();
- // Now we can actually use those slot ID's to declare live ranges.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-
- gcInfoEncoder->Build();
-
- // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
- // let's save the values anyway for debugging purposes
- compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
- compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
-}
-#endif
-
-/*****************************************************************************
- * For CEE_LOCALLOC
- */
-
-regNumber CodeGen::genLclHeap(GenTreePtr size)
-{
- noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
-
- // regCnt is a register used to hold both
- // the amount to stack alloc (either in bytes or pointer sized words)
- // and the final stack alloc address to return as the result
- //
- regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
- var_types type = genActualType(size->gtType);
- emitAttr easz = emitTypeSize(type);
-
-#ifdef DEBUG
- // Verify ESP
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
-
- BasicBlock* esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
- }
-#endif
-
- noway_assert(isFramePointerUsed());
- noway_assert(genStackLevel == 0); // Can't have anything on the stack
-
- BasicBlock* endLabel = NULL;
-#if FEATURE_FIXED_OUT_ARGS
- bool stackAdjusted = false;
-#endif
-
- if (size->IsCnsIntOrI())
- {
-#if FEATURE_FIXED_OUT_ARGS
- // If we have an outgoing arg area then we must adjust the SP
- // essentially popping off the outgoing arg area,
- // We will restore it right before we return from this method
- //
- if (compiler->lvaOutgoingArgSpaceSize > 0)
- {
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
- 0); // This must be true for the stack to remain aligned
- inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
- stackAdjusted = true;
- }
-#endif
- size_t amount = size->gtIntCon.gtIconVal;
-
- // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
- amount += (STACK_ALIGN - 1);
- amount &= ~(STACK_ALIGN - 1);
- amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc
- size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
-
- /* If amount is zero then return null in RegCnt */
- if (amount == 0)
- {
- regCnt = regSet.rsGrabReg(RBM_ALLINT);
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
- goto DONE;
- }
-
- /* For small allocations we will generate up to six push 0 inline */
- if (amount <= 6)
- {
- regCnt = regSet.rsGrabReg(RBM_ALLINT);
-#if CPU_LOAD_STORE_ARCH
- regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
- // Set 'regZero' to zero
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
-#endif
-
- while (amount != 0)
- {
-#if CPU_LOAD_STORE_ARCH
- inst_IV(INS_push, (unsigned)genRegMask(regZero));
-#else
- inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
-#endif
- amount--;
- }
-
- regTracker.rsTrackRegTrash(regCnt);
- // --- move regCnt, ESP
- inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
- goto DONE;
- }
- else
- {
- if (!compiler->info.compInitMem)
- {
- // Re-bias amount to be number of bytes to adjust the SP
- amount <<= STACK_ALIGN_SHIFT;
- size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
- if (amount < compiler->eeGetPageSize()) // must be < not <=
- {
- // Since the size is a page or less, simply adjust ESP
-
- // ESP might already be in the guard page, must touch it BEFORE
- // the alloc, not after.
- regCnt = regSet.rsGrabReg(RBM_ALLINT);
- inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
-#if CPU_LOAD_STORE_ARCH
- regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
- regTracker.rsTrackRegTrash(regTmp);
-#else
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
-#endif
- inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
- inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
- regTracker.rsTrackRegTrash(regCnt);
- goto DONE;
- }
- }
- }
- }
-
- // Compute the size of the block to allocate
- genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
- noway_assert(size->gtFlags & GTF_REG_VAL);
- regCnt = size->gtRegNum;
-
-#if FEATURE_FIXED_OUT_ARGS
- // If we have an outgoing arg area then we must adjust the SP
- // essentially popping off the outgoing arg area,
- // We will restore it right before we return from this method
- //
- if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
- {
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
- 0); // This must be true for the stack to remain aligned
- inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
- stackAdjusted = true;
- }
-#endif
-
- // Perform alignment if we don't have a GT_CNS size
- //
- if (!size->IsCnsIntOrI())
- {
- endLabel = genCreateTempLabel();
-
- // If 0 we bail out
- instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, endLabel);
-
- // Align to STACK_ALIGN
- inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
-
- if (compiler->info.compInitMem)
- {
-#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
- // regCnt will be the number of pointer-sized words to locAlloc
- // If the shift right won't do the 'and' do it here
- inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
-#endif
- // --- shr regCnt, 2 ---
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
- }
- else
- {
- // regCnt will be the total number of bytes to locAlloc
-
- inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
- }
- }
-
- BasicBlock* loop;
- loop = genCreateTempLabel();
-
- if (compiler->info.compInitMem)
- {
- // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
-
- /* Since we have to zero out the allocated memory AND ensure that
- ESP is always valid by tickling the pages, we will just push 0's
- on the stack */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_ARM_)
- regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
- regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
- // Set 'regZero1' and 'regZero2' to zero
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
-#endif
-
- // Loop:
- genDefineTempLabel(loop);
-
-#if defined(_TARGET_X86_)
-
- inst_IV(INS_push_hide, 0); // --- push 0
- // Are we done?
- inst_RV(INS_dec, regCnt, type);
-
-#elif defined(_TARGET_ARM_)
-
- inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
- // Are we done?
- inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
-
-#else
- assert(!"Codegen missing");
-#endif // TARGETS
-
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- inst_JMP(jmpNotEqual, loop);
-
- // Move the final value of ESP into regCnt
- inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
- regTracker.rsTrackRegTrash(regCnt);
- }
- else
- {
- // At this point 'regCnt' is set to the total number of bytes to locAlloc
-
- /* We don't need to zero out the allocated memory. However, we do have
- to tickle the pages to ensure that ESP is always valid and is
- in sync with the "stack guard page". Note that in the worst
- case ESP is on the last byte of the guard page. Thus you must
- touch ESP+0 first not ESP+x01000.
-
- Another subtlety is that you don't want ESP to be exactly on the
- boundary of the guard page because PUSH is predecrement, thus
- call setup would not touch the guard page but just beyond it */
-
- /* Note that we go through a few hoops so that ESP never points to
- illegal pages at any time during the ticking process
-
- neg REG
- add REG, ESP // reg now holds ultimate ESP
- jb loop // result is smaller than orignial ESP (no wrap around)
- xor REG, REG, // Overflow, pick lowest possible number
- loop:
- test ESP, [ESP+0] // X86 - tickle the page
- ldr REGH,[ESP+0] // ARM - tickle the page
- mov REGH, ESP
- sub REGH, PAGE_SIZE
- mov ESP, REGH
- cmp ESP, REG
- jae loop
-
- mov ESP, REG
- end:
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_ARM_
-
- inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
- inst_JMP(EJ_hs, loop);
-#else
- inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
- inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
- inst_JMP(EJ_jb, loop);
-#endif
- regTracker.rsTrackRegTrash(regCnt);
-
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
-
- genDefineTempLabel(loop);
-
- // This is a workaround to avoid the emitter trying to track the
- // decrement of the ESP - we do the subtraction in another reg
- // instead of adjusting ESP directly.
-
- regNumber regTemp = regSet.rsPickReg();
-
- // Tickle the decremented value, and move back to ESP,
- // note that it has to be done BEFORE the update of ESP since
- // ESP might already be on the guard page. It is OK to leave
- // the final value of ESP on the guard page
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
-#else
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
-#endif
-
- inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
- regTracker.rsTrackRegTrash(regTemp);
-
- inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
- inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
-
- genRecoverReg(size, RBM_ALLINT,
- RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
- noway_assert(size->gtFlags & GTF_REG_VAL);
- regCnt = size->gtRegNum;
- inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- inst_JMP(jmpGEU, loop);
-
- // Move the final value to ESP
- inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
- }
- regSet.rsMarkRegFree(genRegMask(regCnt));
-
-DONE:
-
- noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
-
- if (endLabel != NULL)
- genDefineTempLabel(endLabel);
-
-#if FEATURE_FIXED_OUT_ARGS
- // If we have an outgoing arg area then we must readjust the SP
- //
- if (stackAdjusted)
- {
- assert(compiler->lvaOutgoingArgSpaceSize > 0);
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
- 0); // This must be true for the stack to remain aligned
- inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
- }
-#endif
-
- /* Write the lvaShadowSPfirst stack frame slot */
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
-
-#if STACK_PROBES
- // Don't think it is worth it the codegen complexity to embed this
- // when it's possible in each of the customized allocas.
- if (compiler->opts.compNeedStackProbes)
- {
- genGenerateStackProbe();
- }
-#endif
-
-#ifdef DEBUG
- // Update new ESP
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
- }
-#endif
-
- return regCnt;
-}
-
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
-#ifdef _TARGET_X86_
- // Non-x86 platforms are allowed to access all arguments directly
- // so we don't need this code.
-
- // Is this a varargs function?
-
- if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
- varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
- {
- noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
-
- // All stack arguments (except the varargs handle) have to be
- // accessed via the varargs cookie. Discard generated info,
- // and just find its position relative to the varargs handle
-
- PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
- if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
- {
- noway_assert(!compiler->opts.compDbgCode);
- return;
- }
-
- // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
- // arguments of vararg functions to avoid reporting them to GC.
- noway_assert(!compiler->lvaTable[varNum].lvRegister);
- unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
- unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
-
- noway_assert(cookieOffset < varOffset);
- unsigned offset = varOffset - cookieOffset;
- unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
- noway_assert(offset < stkArgSize);
- offset = stkArgSize - offset;
-
- varLoc.vlType = Compiler::VLT_FIXED_VA;
- varLoc.vlFixedVarArg.vlfvOffset = offset;
- }
-
-#endif // _TARGET_X86_
-
- VarName name = NULL;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-
-#endif // DEBUGGING_SUPPORT
-
-/*****************************************************************************
- *
- * Return non-zero if the given register is free after the given tree is
- * evaluated (i.e. the register is either not used at all, or it holds a
- * register variable which is not live after the given node).
- * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
- * constant operand, and one that's in a register. Thus, the only thing we
- * need to determine is whether the register holding op1 is dead.
- */
-bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
-{
- regMaskTP vars;
- regMaskTP mask = genRegMask(reg);
-
- if (regSet.rsMaskUsed & mask)
- return false;
-
- assert(tree->gtOper == GT_ADD);
- GenTreePtr regValTree = tree->gtOp.gtOp1;
- if (!tree->gtOp.gtOp2->IsCnsIntOrI())
- {
- regValTree = tree->gtOp.gtOp2;
- assert(tree->gtOp.gtOp1->IsCnsIntOrI());
- }
- assert(regValTree->gtFlags & GTF_REG_VAL);
-
- /* At this point, the only way that the register will remain live
- * is if it is itself a register variable that isn't dying.
- */
- assert(regValTree->gtRegNum == reg);
- if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
- return false;
- else
- return true;
-}
-
-/*****************************************************************************/
-//
-// This method calculates the USE and DEF values for a statement.
-// It also calls fgSetRngChkTarget for the statement.
-//
-// We refactor out this code from fgPerBlockLocalVarLiveness
-// and add QMARK logics to it.
-//
-// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
-//
-// The usage of this method is very limited.
-// We should only call it for the first node in the statement or
-// for the node after the GTF_RELOP_QMARK node.
-//
-// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
-
-/*
- Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
- when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
- from both trees.
-
- Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
- linked by gtNext.
-
- The algorithm we use is:
- (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
- (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
- We cache copies of current fgCurDefSet and fgCurUseSet.
- (The fact that it is recursively calling itself is for nested QMARK case,
- where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
- (3) We walk the thenTree.
- (4) When we see GT_COLON node, we know that we just finished the thenTree.
- We then make a copy of the current fgCurDefSet and fgCurUseSet,
- restore them to the ones before the thenTree, and then continue walking
- the elseTree.
- (5) When we see the GT_QMARK node, we know we just finished the elseTree.
- So we combine the results from the thenTree and elseTree and then return.
-
-
- +--------------------+
- | GT_QMARK 11|
- +----------+---------+
- |
- *
- / \
- / \
- / \
- +---------------------+ +--------------------+
- | GT_<cond> 3 | | GT_COLON 7 |
- | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND |
- +----------+----------+ +---------+----------+
- | |
- * *
- / \ / \
- / \ / \
- / \ / \
- 2 1 thenTree 6 elseTree 10
- x | |
- / * *
- +----------------+ / / \ / \
- |prevExpr->gtNext+------/ / \ / \
- +----------------+ / \ / \
- 5 4 9 8
-
-
-*/
-
-GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
- GenTreePtr relopNode, // The node before the startNode.
- // (It should either be NULL or
- // a GTF_RELOP_QMARK node.)
- GenTreePtr asgdLclVar)
-{
- GenTreePtr tree;
-
- VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so
- VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree.
-
- bool heapUse_BeforeSplit = fgCurHeapUse;
- bool heapDef_BeforeSplit = fgCurHeapDef;
- bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
-
- VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store
- // the USE and DEF sets after
- VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
-
- bool heapUse_AfterThenTree = fgCurHeapUse;
- bool heapDef_AfterThenTree = fgCurHeapDef;
- bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
-
- // relopNode is either NULL or a GTF_RELOP_QMARK node.
- assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
-
- // If relopNode is NULL, then the startNode must be the 1st node of the statement.
- // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
- assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
- (relopNode && startNode == relopNode->gtNext));
-
- for (tree = startNode; tree; tree = tree->gtNext)
- {
- switch (tree->gtOper)
- {
-
- case GT_QMARK:
-
- // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
- noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
-
- // By the time we see a GT_QMARK, we must have finished processing the elseTree.
- // So it's the time to combine the results
- // from the the thenTree and the elseTree, and then return.
-
- VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
- VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
-
- fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree;
- fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
- fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree;
-
- // Return the GT_QMARK node itself so the caller can continue from there.
- // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
- // in the "for" statement.
- goto _return;
-
- case GT_COLON:
- // By the time we see GT_COLON, we must have just walked the thenTree.
- // So we need to do two things here.
- // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
- // with the result from the elseTree.
- // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
- // and then continue walking the elseTree.
- VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
- VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
-
- heapDef_AfterThenTree = fgCurHeapDef;
- heapHavoc_AfterThenTree = fgCurHeapHavoc;
- heapUse_AfterThenTree = fgCurHeapUse;
-
- VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
- VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
-
- fgCurHeapDef = heapDef_BeforeSplit;
- fgCurHeapHavoc = heapHavoc_BeforeSplit;
- fgCurHeapUse = heapUse_BeforeSplit;
-
- break;
-
- case GT_LCL_VAR:
- case GT_LCL_FLD:
- case GT_LCL_VAR_ADDR:
- case GT_LCL_FLD_ADDR:
- case GT_STORE_LCL_VAR:
- case GT_STORE_LCL_FLD:
- fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
- break;
-
- case GT_CLS_VAR:
- // For Volatile indirection, first mutate the global heap
- // see comments in ValueNum.cpp (under case GT_CLS_VAR)
- // This models Volatile reads as def-then-use of the heap.
- // and allows for a CSE of a subsequent non-volatile read
- if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
- {
- // For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
- }
- // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
- // assignment.
- // Otherwise, we treat it as a use here.
- if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
- {
- fgCurHeapUse = true;
- }
- break;
-
- case GT_IND:
- // For Volatile indirection, first mutate the global heap
- // see comments in ValueNum.cpp (under case GT_CLS_VAR)
- // This models Volatile reads as def-then-use of the heap.
- // and allows for a CSE of a subsequent non-volatile read
- if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
- {
- // For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
- }
-
- // If the GT_IND is the lhs of an assignment, we'll handle it
- // as a heap def, when we get to assignment.
- // Otherwise, we treat it as a use here.
- if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
- {
- GenTreeLclVarCommon* dummyLclVarTree = NULL;
- bool dummyIsEntire = false;
- GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
- if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
- {
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- }
- else
- {
- // Defines a local addr
- assert(dummyLclVarTree != nullptr);
- fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
- }
- }
- break;
-
- // These should have been morphed away to become GT_INDs:
- case GT_FIELD:
- case GT_INDEX:
- unreached();
- break;
-
- // We'll assume these are use-then-defs of the heap.
- case GT_LOCKADD:
- case GT_XADD:
- case GT_XCHG:
- case GT_CMPXCHG:
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
- break;
-
- case GT_MEMORYBARRIER:
- // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
- fgCurHeapDef = true;
- break;
-
- // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
- case GT_CALL:
- {
- GenTreeCall* call = tree->AsCall();
- bool modHeap = true;
- if (call->gtCallType == CT_HELPER)
- {
- CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
-
- if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
- {
- modHeap = false;
- }
- }
- if (modHeap)
- {
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
- }
- }
-
- // If this is a p/invoke unmanaged call or if this is a tail-call
- // and we have an unmanaged p/invoke call in the method,
- // then we're going to run the p/invoke epilog.
- // So we mark the FrameRoot as used by this instruction.
- // This ensures that the block->bbVarUse will contain
- // the FrameRoot local var if is it a tracked variable.
-
- if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
- {
- /* Get the TCB local and mark it as used */
-
- noway_assert(info.compLvFrameListRoot < lvaCount);
-
- LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
-
- if (varDsc->lvTracked)
- {
- if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
- {
- VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
- }
- }
- }
-
- break;
-
- default:
-
- // Determine whether it defines a heap location.
- if (tree->OperIsAssignment() || tree->OperIsBlkOp())
- {
- GenTreeLclVarCommon* dummyLclVarTree = NULL;
- if (!tree->DefinesLocal(this, &dummyLclVarTree))
- {
- // If it doesn't define a local, then it might update the heap.
- fgCurHeapDef = true;
- }
- }
-
- // Are we seeing a GT_<cond> for a GT_QMARK node?
- if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
- {
- // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
- // Recursively call fgLegacyPerStatementLocalVarLiveness.
- // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
- // current
- // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
- // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
- // node.
- tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
-
- // We must have been returned here after seeing a GT_QMARK node.
- noway_assert(tree->gtOper == GT_QMARK);
- }
-
- break;
- }
- }
-
-_return:
- return tree;
-}
-
-/*****************************************************************************/
-
-/*****************************************************************************
- * Initialize the TCB local and the NDirect stub, afterwards "push"
- * the hoisted NDirect stub.
- *
- * 'initRegs' is the set of registers which will be zeroed out by the prolog
- * typically initRegs is zero
- *
- * The layout of the NDirect Inlined Call Frame is as follows:
- * (see VM/frames.h and VM/JITInterface.cpp for more information)
- *
- * offset field name when set
- * --------------------------------------------------------------
- * +00h vptr for class InlinedCallFrame method prolog
- * +04h m_Next method prolog
- * +08h m_Datum call site
- * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
- * +10h m_pCallerReturnAddress call site
- * +14h m_pCalleeSavedRegisters not set by JIT
- * +18h JIT retval spill area (int) before call_gc
- * +1ch JIT retval spill area (long) before call_gc
- * +20h Saved value of EBP method prolog
- */
-
-regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
-{
- assert(compiler->compGeneratingProlog);
- noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
- noway_assert(compiler->info.compCallUnmanaged);
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
- noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
-
- /* let's find out if compLvFrameListRoot is enregistered */
-
- LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
-
- noway_assert(!varDsc->lvIsParam);
- noway_assert(varDsc->lvType == TYP_I_IMPL);
-
- DWORD threadTlsIndex, *pThreadTlsIndex;
-
- threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
-#if defined(_TARGET_X86_)
- if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
-#else
- if (true)
-#endif
- {
- // Instead of calling GetThread(), and getting GS cookie and
- // InlinedCallFrame vptr through indirections, we'll call only one helper.
- // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
- // and uses REG_PINVOKE_SCRATCH as scratch register.
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
- regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
-
- // We're about to trask REG_PINVOKE_TCB, it better not be in use!
- assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
-
- // Don't use the argument registers (including the special argument in
- // REG_PINVOKE_FRAME) for computing the target address.
- regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
-
- genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
-
- regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
-
- if (varDsc->lvRegister)
- {
- regNumber regTgt = varDsc->lvRegNum;
-
- // we are about to initialize it. So turn the bit off in initRegs to prevent
- // the prolog reinitializing it.
- initRegs &= ~genRegMask(regTgt);
-
- if (regTgt != REG_PINVOKE_TCB)
- {
- // move TCB to the its register if necessary
- getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
- regTracker.rsTrackRegTrash(regTgt);
- }
- }
- else
- {
- // move TCB to its stack location
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
- compiler->info.compLvFrameListRoot, 0);
- }
-
- // We are done, the rest of this function deals with the inlined case.
- return initRegs;
- }
-
- regNumber regTCB;
-
- if (varDsc->lvRegister)
- {
- regTCB = varDsc->lvRegNum;
-
- // we are about to initialize it. So turn the bit off in initRegs to prevent
- // the prolog reinitializing it.
- initRegs &= ~genRegMask(regTCB);
- }
- else // varDsc is allocated on the Stack
- {
- regTCB = REG_PINVOKE_TCB;
- }
-
-#if !defined(_TARGET_ARM_)
-#define WIN_NT_TLS_OFFSET (0xE10)
-#define WIN_NT5_TLS_HIGHOFFSET (0xf94)
-
- /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
-
- // TODO-ARM-CQ: should we inline TlsGetValue here?
-
- if (threadTlsIndex < 64)
- {
- // mov reg, FS:[0xE10+threadTlsIndex*4]
- getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
- WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
- regTracker.rsTrackRegTrash(regTCB);
- }
- else
- {
- noway_assert(pInfo->osMajor >= 5);
-
- DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
- threadTlsIndex -= 64;
-
- // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
- // mov reg, [reg+threadTlsIndex*4]
-
- getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
- regTracker.rsTrackRegTrash(regTCB);
- }
-#endif
-
- /* save TCB in local var if not enregistered */
-
- if (!varDsc->lvRegister)
- {
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
- }
-
- /* set frame's vptr */
-
- const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
- inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
- noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
- compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
- REG_PINVOKE_SCRATCH);
-
- // Set the GSCookie
- GSCookie gsCookie, *pGSCookie;
- compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
- noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
-
- /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
- set next field in frame */
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
- pInfo->offsetOfThreadFrame);
- regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
- compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
-
- noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
-
- /* set EBP value in frame */
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
- compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
-
- /* reset track field in frame */
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
-
- /* get address of our frame */
-
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
- regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
-
- /* now "push" our N/direct frame */
-
- getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
- pInfo->offsetOfThreadFrame);
-
- return initRegs;
-}
-
-/*****************************************************************************
- * Unchain the InlinedCallFrame.
- * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
- * or tail call.
- */
-void CodeGen::genPInvokeMethodEpilog()
-{
- noway_assert(compiler->info.compCallUnmanaged);
- noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
- noway_assert(compiler->compCurBB == compiler->genReturnBB ||
- (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
- (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
- noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
-
- getEmitter()->emitDisableRandomNops();
- // debug check to make sure that we're not using ESI and/or EDI across this call, except for
- // compLvFrameListRoot.
- unsigned regTrashCheck = 0;
-
- /* XXX Tue 5/29/2007
- * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code
- * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
- */
- LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
- regNumber reg;
- regNumber reg2 = REG_PINVOKE_FRAME;
-
- //
- // Two cases for epilog invocation:
- //
- // 1. Return
- // We can trash the ESI/EDI registers.
- //
- // 2. Tail call
- // When tail called, we'd like to preserve enregistered args,
- // in ESI/EDI so we can pass it to the callee.
- //
- // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
- // Instead use the reserved local variable slot.
- //
- if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
- {
- if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
- {
-#if FEATURE_FIXED_OUT_ARGS
- // Save the register in the reserved local var slot.
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
- compiler->lvaPInvokeFrameRegSaveVar, 0);
-#else
- inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
-#endif
- }
- if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
- {
-#if FEATURE_FIXED_OUT_ARGS
- // Save the register in the reserved local var slot.
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
- compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
-#else
- inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
-#endif
- }
- }
-
- if (varDsc->lvRegister)
- {
- reg = varDsc->lvRegNum;
- if (reg == reg2)
- reg2 = REG_PINVOKE_TCB;
-
- regTrashCheck |= genRegMask(reg2);
- }
- else
- {
- /* mov esi, [tcb address] */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
- 0);
- regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
- reg = REG_PINVOKE_TCB;
-
- regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
- }
-
- /* mov edi, [ebp-frame.next] */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
- regTracker.rsTrackRegTrash(reg2);
-
- /* mov [esi+offsetOfThreadFrame], edi */
-
- getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
-
- noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
-
- if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
- compiler->lvaTable[compiler->genReturnLocal].lvRegister)
- {
- // really make sure we're not clobbering compiler->genReturnLocal.
- noway_assert(
- !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
- ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
- }
-
- (void)regTrashCheck;
-
- // Restore the registers ESI and EDI.
- if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
- {
- if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
- {
-#if FEATURE_FIXED_OUT_ARGS
- // Restore the register from the reserved local var slot.
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
- compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
-#else
- inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
-#endif
- regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
- }
- if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
- {
-#if FEATURE_FIXED_OUT_ARGS
- // Restore the register from the reserved local var slot.
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
- compiler->lvaPInvokeFrameRegSaveVar, 0);
-#else
- inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
-#endif
- regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
- }
- }
- getEmitter()->emitEnableRandomNops();
-}
-
-/*****************************************************************************
- This function emits the call-site prolog for direct calls to unmanaged code.
- It does all the necessary setup of the InlinedCallFrame.
- frameListRoot specifies the local containing the thread control block.
- argSize or methodToken is the value to be copied into the m_datum
- field of the frame (methodToken may be indirected & have a reloc)
- The function returns the register now containing the thread control block,
- (it could be either enregistered or loaded into one of the scratch registers)
-*/
-
-regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot,
- int argSize,
- CORINFO_METHOD_HANDLE methodToken,
- BasicBlock* returnLabel)
-{
- // Some stack locals might be 'cached' in registers, we need to trash them
- // from the regTracker *and* also ensure the gc tracker does not consider
- // them live (see the next assert). However, they might be live reg vars
- // that are non-pointers CSE'd from pointers.
- // That means the register will be live in rsMaskVars, so we can't just
- // call gcMarkSetNpt().
- {
- regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
- gcInfo.gcRegGCrefSetCur &= ~deadRegs;
- gcInfo.gcRegByrefSetCur &= ~deadRegs;
-
-#ifdef DEBUG
- deadRegs &= regSet.rsMaskVars;
- if (deadRegs)
- {
- for (LclVarDsc* varDsc = compiler->lvaTable;
- ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
- {
- if (!varDsc->lvTracked || !varDsc->lvRegister)
- continue;
-
- if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
- continue;
-
- regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
- if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
- varRegMask |= genRegMask(varDsc->lvOtherReg);
-
- if (varRegMask & deadRegs)
- {
- // We found the enregistered var that should not be live if it
- // was a GC pointer.
- noway_assert(!varTypeIsGC(varDsc));
- deadRegs &= ~varRegMask;
- }
- }
- }
-#endif // DEBUG
- }
-
- /* Since we are using the InlinedCallFrame, we should have spilled all
- GC pointers to it - even from callee-saved registers */
-
- noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
-
- /* must specify only one of these parameters */
- noway_assert((argSize == 0) || (methodToken == NULL));
-
- /* We are about to call unmanaged code directly.
- Before we can do that we have to emit the following sequence:
-
- mov dword ptr [frame.callTarget], MethodToken
- mov dword ptr [frame.callSiteTracker], esp
- mov reg, dword ptr [tcb_address]
- mov byte ptr [tcb+offsetOfGcState], 0
-
- */
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
-
- noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
-
- /* mov dword ptr [frame.callSiteTarget], value */
-
- if (methodToken == NULL)
- {
- /* mov dword ptr [frame.callSiteTarget], argSize */
- instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
- }
- else
- {
- void *embedMethHnd, *pEmbedMethHnd;
-
- embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
-
- noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
-
- if (embedMethHnd != NULL)
- {
- /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
- compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
- }
- else
- {
- /* mov reg, dword ptr [MethodDescIndir]
- mov dword ptr [frame.callSiteTarget], reg */
-
- regNumber reg = regSet.rsPickFreeReg();
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
-#else // !CPU_LOAD_STORE_ARCH
- getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
-#endif // !CPU_LOAD_STORE_ARCH
- regTracker.rsTrackRegTrash(reg);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
- }
- }
-
- regNumber tcbReg = REG_NA;
-
- if (frameListRoot->lvRegister)
- {
- tcbReg = frameListRoot->lvRegNum;
- }
- else
- {
- tcbReg = regSet.rsGrabReg(RBM_ALLINT);
-
- /* mov reg, dword ptr [tcb address] */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
- (unsigned)(frameListRoot - compiler->lvaTable), 0);
- regTracker.rsTrackRegTrash(tcbReg);
- }
-
-#ifdef _TARGET_X86_
- /* mov dword ptr [frame.callSiteTracker], esp */
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
-#endif // _TARGET_X86_
-
-#if CPU_LOAD_STORE_ARCH
- regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
- getEmitter()->emitIns_J_R(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
- regTracker.rsTrackRegTrash(tmpReg);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
-#else // !CPU_LOAD_STORE_ARCH
- /* mov dword ptr [frame.callSiteReturnAddress], label */
-
- getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
-#endif // !CPU_LOAD_STORE_ARCH
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
-
- noway_assert(tmpReg != tcbReg);
-
- getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
-#else // !CPU_LOAD_STORE_ARCH
- /* mov byte ptr [tcbReg+offsetOfGcState], 0 */
-
- getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
-#endif // !CPU_LOAD_STORE_ARCH
-
- return tcbReg;
-}
-
-/*****************************************************************************
- *
- First we have to mark in the hoisted NDirect stub that we are back
- in managed code. Then we have to check (a global flag) whether GC is
- pending or not. If so, we just call into a jit-helper.
- Right now we have this call always inlined, i.e. we always skip around
- the jit-helper call.
- Note:
- The tcb address is a regular local (initialized in the prolog), so it is either
- enregistered or in the frame:
-
- tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
- mov byte ptr[tcb_reg+offsetOfGcState], 1
- cmp 'global GC pending flag', 0
- je @f
- [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC
- [mov [EBP+spill_area+0], eax] ; spill the int return value if any
- [mov [EBP+spill_area+4], edx] ; spill the long return value if any
- call @callGC
- [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any
- [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any
- @f:
- */
-
-void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
-{
- BasicBlock* clab_nostop;
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
- regNumber reg2;
- regNumber reg3;
-
-#ifdef _TARGET_ARM_
- reg3 = REG_R3;
-#else
- reg3 = REG_EDX;
-#endif
-
- getEmitter()->emitDisableRandomNops();
-
- if (frameListRoot->lvRegister)
- {
- /* make sure that register is live across the call */
-
- reg2 = frameListRoot->lvRegNum;
- noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
- }
- else
- {
- /* mov reg2, dword ptr [tcb address] */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_ARM_
- reg2 = REG_R2;
-#else
- reg2 = REG_ECX;
-#endif
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
- (unsigned)(frameListRoot - compiler->lvaTable), 0);
- regTracker.rsTrackRegTrash(reg2);
- }
-
-#ifdef _TARGET_ARM_
- /* mov r3, 1 */
- /* strb [r2+offsetOfGcState], r3 */
- instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
- getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
-#else
- /* mov byte ptr [tcb+offsetOfGcState], 1 */
- getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
-#endif
-
- /* test global flag (we return to managed code) */
-
- LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
-
- addrOfCaptureThreadGlobal =
- compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
- noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
-
- // Can we directly use addrOfCaptureThreadGlobal?
-
- if (addrOfCaptureThreadGlobal)
- {
-#ifdef _TARGET_ARM_
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
- getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
- regTracker.rsTrackRegTrash(reg3);
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
-#else
- getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
-#endif
- }
- else
- {
-#ifdef _TARGET_ARM_
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
- getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
- regTracker.rsTrackRegTrash(reg3);
- getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
-#else // !_TARGET_ARM_
-
- getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
- (ssize_t)pAddrOfCaptureThreadGlobal);
- regTracker.rsTrackRegTrash(REG_ECX);
-
- getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
-
-#endif // !_TARGET_ARM_
- }
-
- /* */
- clab_nostop = genCreateTempLabel();
-
- /* Generate the conditional jump */
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, clab_nostop);
-
-#ifdef _TARGET_ARM_
-// The helper preserves the return value on ARM
-#else
- /* save return value (if necessary) */
- if (retVal != RBM_NONE)
- {
- if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
- {
- /* push eax */
-
- inst_RV(INS_push, REG_INTRET, TYP_INT);
-
- if (retVal == RBM_LNGRET)
- {
- /* push edx */
-
- inst_RV(INS_push, REG_EDX, TYP_INT);
- }
- }
- }
-#endif
-
- /* emit the call to the EE-helper that stops for GC (or other reasons) */
-
- genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
- EA_UNKNOWN); /* retSize */
-
-#ifdef _TARGET_ARM_
-// The helper preserves the return value on ARM
-#else
- /* restore return value (if necessary) */
-
- if (retVal != RBM_NONE)
- {
- if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
- {
- if (retVal == RBM_LNGRET)
- {
- /* pop edx */
-
- inst_RV(INS_pop, REG_EDX, TYP_INT);
- regTracker.rsTrackRegTrash(REG_EDX);
- }
-
- /* pop eax */
-
- inst_RV(INS_pop, REG_INTRET, TYP_INT);
- regTracker.rsTrackRegTrash(REG_INTRET);
- }
- }
-#endif
-
- /* genCondJump() closes the current emitter block */
-
- genDefineTempLabel(clab_nostop);
-
- // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with
- // the above code. So the process is:
- // 1) Return to cooperative mode
- // 2) Check to see if we need to stop for GC
- // 3) Return from the p/invoke (as far as the stack walker is concerned).
-
- /* mov dword ptr [frame.callSiteTracker], 0 */
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
-
- getEmitter()->emitEnableRandomNops();
-}
-
-/*****************************************************************************/
-
-/*****************************************************************************
-* TRACKING OF FLAGS
-*****************************************************************************/
-
-void CodeGen::genFlagsEqualToNone()
-{
- genFlagsEqReg = REG_NA;
- genFlagsEqVar = (unsigned)-1;
- genFlagsEqLoc.Init();
-}
-
-/*****************************************************************************
- *
- * Record the fact that the flags register has a value that reflects the
- * contents of the given register.
- */
-
-void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg)
-{
- genFlagsEqLoc.CaptureLocation(getEmitter());
- genFlagsEqReg = reg;
-
- /* previous setting of flags by a var becomes invalid */
-
- genFlagsEqVar = 0xFFFFFFFF;
-
- /* Set appropriate flags on the tree */
-
- if (tree)
- {
- tree->gtFlags |= GTF_ZSF_SET;
- assert(tree->gtSetFlags());
- }
-}
-
-/*****************************************************************************
- *
- * Record the fact that the flags register has a value that reflects the
- * contents of the given local variable.
- */
-
-void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var)
-{
- genFlagsEqLoc.CaptureLocation(getEmitter());
- genFlagsEqVar = var;
-
- /* previous setting of flags by a register becomes invalid */
-
- genFlagsEqReg = REG_NA;
-
- /* Set appropriate flags on the tree */
-
- if (tree)
- {
- tree->gtFlags |= GTF_ZSF_SET;
- assert(tree->gtSetFlags());
- }
-}
-
-/*****************************************************************************
- *
- * Return an indication of whether the flags register is set to the current
- * value of the given register/variable. The return value is as follows:
- *
- * false .. nothing
- * true .. the zero flag (ZF) and sign flag (SF) is set
- */
-
-bool CodeGen::genFlagsAreReg(regNumber reg)
-{
- if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
- {
- return true;
- }
-
- return false;
-}
-
-bool CodeGen::genFlagsAreVar(unsigned var)
-{
- if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
- {
- return true;
- }
-
- return false;
-}
-
-/*****************************************************************************
- * This utility function returns true iff the execution path from "from"
- * (inclusive) to "to" (exclusive) contains a death of the given var
- */
-bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
-{
- GenTreePtr tree;
- for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
- {
- if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
- {
- unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
- if (dyingVarNum == varNum)
- return true;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
- if (varDsc->lvPromoted)
- {
- assert(varDsc->lvType == TYP_STRUCT);
- unsigned firstFieldNum = varDsc->lvFieldLclStart;
- if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
- {
- return true;
- }
- }
- }
- }
- assert(tree != NULL);
- return false;
-}
-
-#endif // LEGACY_BACKEND
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX CodeGenerator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "codegen.h"
+
+#ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
+
+#ifdef _TARGET_AMD64_
+#error AMD64 must be !LEGACY_BACKEND
+#endif
+
+#ifdef _TARGET_ARM64_
+#error ARM64 must be !LEGACY_BACKEND
+#endif
+
+#include "gcinfo.h"
+#include "emit.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+/*****************************************************************************
+ *
+ * Determine what variables die between beforeSet and afterSet, and
+ * update the liveness globals accordingly:
+ * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
+ */
+
+void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ regMaskTP regBit;
+ VARSET_TP VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet));
+
+ if (VarSetOps::IsEmpty(compiler, deadSet))
+ return;
+
+ /* iterate through the dead variables */
+
+ VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ varNum = compiler->lvaTrackedToVarNum[varIndex];
+ varDsc = compiler->lvaTable + varNum;
+
+ /* Remove this variable from the 'deadSet' bit set */
+
+ noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
+
+ VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
+
+ noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
+ VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
+
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+
+ /* We are done if the variable is not enregistered */
+
+ if (!varDsc->lvRegister)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
+ }
+#endif
+ continue;
+ }
+
+#if !FEATURE_FP_REGALLOC
+ // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
+ if (!varDsc->IsFloatRegType())
+#endif
+ {
+ /* Get hold of the appropriate register bit(s) */
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ regBit = genRegMask(varDsc->lvRegNum);
+ if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+ regBit |= genRegMask(varDsc->lvOtherReg);
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
+ compiler->compRegVarName(varDsc->lvRegNum));
+ }
+#endif
+ noway_assert((regSet.rsMaskVars & regBit) != 0);
+
+ regSet.RemoveMaskVars(regBit);
+
+ // Remove GC tracking if any for this register
+
+ if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
+ gcInfo.gcMarkRegSetNpt(regBit);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Change the given enregistered local variable node to a register variable node
+ */
+
+void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc)
+{
+ noway_assert(tree->gtOper == GT_LCL_VAR);
+ noway_assert(varDsc->lvRegister);
+
+ if (isRegPairType(varDsc->lvType))
+ {
+ /* Check for the case of a variable that was narrowed to an int */
+
+ if (isRegPairType(tree->gtType))
+ {
+ genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
+ return;
+ }
+
+ noway_assert(tree->gtFlags & GTF_VAR_CAST);
+ noway_assert(tree->gtType == TYP_INT);
+ }
+ else
+ {
+ noway_assert(!isRegPairType(tree->gtType));
+ }
+
+ /* It's a register variable -- modify the node */
+
+ unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
+
+ ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
+ tree->SetOper(GT_REG_VAR);
+ tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
+
+ tree->gtFlags |= livenessFlags;
+ tree->gtFlags |= GTF_REG_VAL;
+ tree->gtRegNum = varDsc->lvRegNum;
+ tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
+ tree->gtRegVar.SetLclNum(varNum);
+}
+
+// inline
+void CodeGen::saveLiveness(genLivenessSet* ls)
+{
+ VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
+ VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
+ ls->maskVars = (regMaskSmall)regSet.rsMaskVars;
+ ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
+ ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
+}
+
+// inline
+void CodeGen::restoreLiveness(genLivenessSet* ls)
+{
+ VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
+ VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
+ regSet.rsMaskVars = ls->maskVars;
+ gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
+ gcInfo.gcRegByrefSetCur = ls->byRefRegs;
+}
+
+// inline
+void CodeGen::checkLiveness(genLivenessSet* ls)
+{
+ assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
+ assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
+ assert(regSet.rsMaskVars == ls->maskVars);
+ assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
+ assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
+}
+
+// inline
+bool CodeGenInterface::genMarkLclVar(GenTreePtr tree)
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ assert(tree->gtOper == GT_LCL_VAR);
+
+ /* Does the variable live in a register? */
+
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+ if (varDsc->lvRegister)
+ {
+ genBashLclVar(tree, varNum, varDsc);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+// inline
+GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
+{
+ bool rev;
+ unsigned mul;
+ unsigned cns;
+ GenTreePtr adr;
+ GenTreePtr idx;
+
+ if (genCreateAddrMode(tree, // address
+ 0, // mode
+ false, // fold
+ RBM_NONE, // reg mask
+ &rev, // reverse ops
+ &adr, // base addr
+ &idx, // index val
+#if SCALED_ADDR_MODES
+ &mul, // scaling
+#endif
+ &cns, // displacement
+ true)) // don't generate code
+ return adr;
+ else
+ return NULL;
+}
+
+// inline
+void CodeGen::genSinglePush()
+{
+ genStackLevel += sizeof(void*);
+}
+
+// inline
+void CodeGen::genSinglePop()
+{
+ genStackLevel -= sizeof(void*);
+}
+
+#if FEATURE_STACK_FP_X87
+// inline
+void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
+{
+ genFPstkLevel = newValue;
+}
+
+// inline
+unsigned CodeGenInterface::genGetFPstkLevel()
+{
+ return genFPstkLevel;
+}
+
+// inline
+void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
+{
+ noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
+ genFPstkLevel += inc;
+}
+
+// inline
+void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
+{
+ noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
+ genFPstkLevel -= dec;
+}
+
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ * Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+ noway_assert(type != TYP_REF || val == NULL);
+
+ /* Does the reg already hold this constant? */
+
+ if (!regTracker.rsIconIsInReg(val, reg))
+ {
+ if (val == 0)
+ {
+ instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
+ }
+#ifdef _TARGET_ARM_
+ // If we can set a register to a constant with a small encoding, then do that.
+ else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
+ {
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+ }
+#endif
+ else
+ {
+ /* See if a register holds the value or a close value? */
+ bool constantLoaded = false;
+ ssize_t delta;
+ regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
+
+ if (srcReg != REG_NA)
+ {
+ if (delta == 0)
+ {
+ inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
+ constantLoaded = true;
+ }
+ else
+ {
+#if defined(_TARGET_XARCH_)
+ /* delta should fit inside a byte */
+ if (delta == (signed char)delta)
+ {
+ /* use an lea instruction to set reg */
+ getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
+ constantLoaded = true;
+ }
+#elif defined(_TARGET_ARM_)
+ /* We found a register 'regS' that has the value we need, modulo a small delta.
+ That is, the value we need is 'regS + delta'.
+ We one to generate one of the following instructions, listed in order of preference:
+
+ adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
+ 0<=delta<=255
+ subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
+ -255<=delta<=0
+ adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
+ subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
+ mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
+ movw regD, icon ; 4 bytes. 0<=icon<=65535
+ add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
+ sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
+ addw regD, regS, delta ; 4 bytes. 0<=delta<=4095
+ subw regD, regS, delta ; 4 bytes. -4095<=delta<=0
+
+ If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
+ than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
+ can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
+ 0x80000000. In this case, delta will be 1.
+ */
+
+ bool useAdd = false;
+ regMaskTP regMask = genRegMask(reg);
+ regMaskTP srcRegMask = genRegMask(srcReg);
+
+ if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
+ (unsigned_abs(delta) <= 255))
+ {
+ useAdd = true;
+ }
+ else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
+ (unsigned_abs(delta) <= 7))
+ {
+ useAdd = true;
+ }
+ else if (arm_Valid_Imm_For_Mov(val))
+ {
+ // fall through to general "!constantLoaded" case below
+ }
+ else if (arm_Valid_Imm_For_Add(delta, flags))
+ {
+ useAdd = true;
+ }
+
+ if (useAdd)
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
+ constantLoaded = true;
+ }
+#else
+ assert(!"Codegen missing");
+#endif
+ }
+ }
+
+ if (!constantLoaded) // Have we loaded it yet?
+ {
+#ifdef _TARGET_X86_
+ if (val == -1)
+ {
+ /* or reg,-1 takes 3 bytes */
+ inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
+ }
+ else
+ /* For SMALL_CODE it is smaller to push a small immediate and
+ then pop it into the dest register */
+ if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
+ {
+ /* "mov" has no s(sign)-bit and so always takes 6 bytes,
+ whereas push+pop takes 2+1 bytes */
+
+ inst_IV(INS_push, val);
+ genSinglePush();
+
+ inst_RV(INS_pop, reg, type);
+ genSinglePop();
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+ }
+ }
+ }
+ }
+ regTracker.rsTrackRegIntCns(reg, val);
+ gcInfo.gcMarkRegPtrVal(reg, type);
+}
+
+/*****************************************************************************
+ *
+ * Find an existing register set to the given integer constant, or
+ * pick a register and generate code that will set it to the integer constant.
+ *
+ * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
+ * to pick some register to set. NOTE that this means the returned regNumber
+ * might *not* be in regBest. It also implies that you should lock any registers
+ * you don't want spilled (not just mark as used).
+ *
+ */
+
+regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
+{
+ regNumber regCns;
+#if REDUNDANT_LOAD
+
+ // Is there already a register with zero that we can use?
+ regCns = regTracker.rsIconIsInReg(val);
+
+ if (regCns == REG_NA)
+#endif
+ {
+ // If not, grab a register to hold the constant, preferring
+ // any register besides RBM_TMP_0 so it can hopefully be re-used
+ regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
+
+ // Now set the constant
+ genSetRegToIcon(regCns, val, type);
+ }
+
+ // NOTE: there is guarantee that regCns is in regBest's mask
+ return regCns;
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Add the given constant to the specified register.
+ * 'tree' is the resulting tree
+ */
+
+void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
+{
+ bool setFlags = (tree != NULL) && tree->gtSetFlags();
+
+#ifdef _TARGET_XARCH_
+ /* First check to see if we can generate inc or dec instruction(s) */
+ /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
+ if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
+ {
+ emitAttr size = emitTypeSize(dstType);
+
+ switch (ival)
+ {
+ case 2:
+ inst_RV(INS_inc, reg, dstType, size);
+ __fallthrough;
+ case 1:
+ inst_RV(INS_inc, reg, dstType, size);
+
+ goto UPDATE_LIVENESS;
+
+ case -2:
+ inst_RV(INS_dec, reg, dstType, size);
+ __fallthrough;
+ case -1:
+ inst_RV(INS_dec, reg, dstType, size);
+
+ goto UPDATE_LIVENESS;
+ }
+ }
+#endif
+
+ insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
+
+#ifdef _TARGET_XARCH_
+UPDATE_LIVENESS:
+#endif
+
+ if (setFlags)
+ genFlagsEqualToReg(tree, reg);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+ if (tree != NULL)
+ {
+ if (!tree->OperIsAssignment())
+ {
+ genMarkTreeInReg(tree, reg);
+ if (varTypeIsGC(tree->TypeGet()))
+ gcInfo.gcMarkRegSetByref(genRegMask(reg));
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Subtract the given constant from the specified register.
+ * Should only be used for unsigned sub with overflow. Else
+ * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
+ * for these cases as the flags are set differently, and the following
+ * check for overflow won't work correctly.
+ * 'tree' is the resulting tree.
+ */
+
+void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree)
+{
+ noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
+ ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
+ noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ noway_assert(!varTypeIsGC(tree->TypeGet()));
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ if (tree)
+ {
+ genMarkTreeInReg(tree, reg);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Multiply the specified register by the given value.
+ * 'tree' is the resulting tree
+ */
+
+void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
+{
+ noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ if (tree)
+ {
+ genMarkTreeInReg(tree, reg);
+ }
+
+ bool use_shift = false;
+ unsigned shift_by = 0;
+
+ if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
+ {
+ use_shift = true;
+ BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
+ }
+
+ if (use_shift)
+ {
+ if (shift_by != 0)
+ {
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+ }
+ }
+ else
+ {
+ instruction ins;
+#ifdef _TARGET_XARCH_
+ ins = getEmitter()->inst3opImulForReg(reg);
+#else
+ ins = INS_mul;
+#endif
+
+ inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Compute the value 'tree' into a register that's in 'needReg'
+ * (or any free register if 'needReg' is RBM_NONE).
+ *
+ * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
+ * If keepReg==RegSet::KEEP_REG, we mark the register as being used.
+ *
+ * If you require that the register returned is trashable, pass true for 'freeOnly'.
+ */
+
+void CodeGen::genComputeReg(
+ GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
+{
+ noway_assert(tree->gtType != TYP_VOID);
+
+ regNumber reg;
+ regNumber rg2;
+
+#if FEATURE_STACK_FP_X87
+ noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+ genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
+#elif defined(_TARGET_ARM_)
+ noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+ genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
+ genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
+ genActualType(tree->gtType) == TYP_STRUCT);
+#else
+ noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+ genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
+ genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
+#endif
+
+ /* Generate the value, hopefully into the right register */
+
+ genCodeForTree(tree, needReg);
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+ // There is a workaround in genCodeForTreeLng() that changes the type of the
+ // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
+ // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
+ // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
+ // running the rest of this code, because anything looking at gtRegNum on ARM or
+ // attempting to move from EAX/EDX will be wrong.
+ if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
+ goto REG_OK;
+
+ reg = tree->gtRegNum;
+
+ /* Did the value end up in an acceptable register? */
+
+ if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
+ {
+ /* Not good enough to satisfy the caller's orders */
+
+ if (varTypeIsFloating(tree))
+ {
+ RegSet::RegisterPreference pref(needReg, RBM_NONE);
+ rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
+ }
+ else
+ {
+ rg2 = regSet.rsGrabReg(needReg);
+ }
+ }
+ else
+ {
+ /* Do we have to end up with a free register? */
+
+ if (!freeOnly)
+ goto REG_OK;
+
+ /* Did we luck out and the value got computed into an unused reg? */
+
+ if (genRegMask(reg) & regSet.rsRegMaskFree())
+ goto REG_OK;
+
+ /* Register already in use, so spill previous value */
+
+ if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
+ {
+ rg2 = regSet.rsGrabReg(needReg);
+ if (rg2 == reg)
+ {
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+ tree->gtRegNum = reg;
+ goto REG_OK;
+ }
+ }
+ else
+ {
+ /* OK, let's find a trashable home for the value */
+
+ regMaskTP rv1RegUsed;
+
+ regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
+ rg2 = regSet.rsPickReg(needReg);
+ regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
+ }
+ }
+
+ noway_assert(reg != rg2);
+
+ /* Update the value in the target register */
+
+ regTracker.rsTrackRegCopy(rg2, reg);
+
+ inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
+
+ /* The value has been transferred to 'reg' */
+
+ if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+ gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
+
+ /* The value is now in an appropriate register */
+
+ tree->gtRegNum = rg2;
+
+REG_OK:
+
+ /* Does the caller want us to mark the register as used? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ /* In case we're computing a value into a register variable */
+
+ genUpdateLife(tree);
+
+ /* Mark the register as 'used' */
+
+ regSet.rsMarkRegUsed(tree);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Same as genComputeReg(), the only difference being that the result is
+ * guaranteed to end up in a trashable register.
+ */
+
+// inline
+void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+ genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
+}
+
+/*****************************************************************************
+ *
+ * The value 'tree' was earlier computed into a register; free up that
+ * register (but also make sure the value is presently in a register).
+ */
+
+void CodeGen::genReleaseReg(GenTreePtr tree)
+{
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* The register has been spilled -- reload it */
+
+ regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
+ return;
+ }
+
+ regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
+}
+
+/*****************************************************************************
+ *
+ * The value 'tree' was earlier computed into a register. Check whether that
+ * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
+ * free the register. The caller shouldn't need to be setting GCness of the register
+ * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
+ */
+
+void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* The register has been spilled -- reload it */
+
+ regSet.rsUnspillReg(tree, needReg, keepReg);
+ return;
+ }
+ else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
+ {
+ /* We need the tree in another register. So move it there */
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+ regNumber oldReg = tree->gtRegNum;
+
+ /* Pick an acceptable register */
+
+ regNumber reg = regSet.rsGrabReg(needReg);
+
+ /* Copy the value */
+
+ inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
+ tree->gtRegNum = reg;
+
+ gcInfo.gcMarkRegPtrVal(tree);
+ regSet.rsMarkRegUsed(tree);
+ regSet.rsMarkRegFree(oldReg, tree);
+
+ regTracker.rsTrackRegCopy(reg, oldReg);
+ }
+
+ /* Free the register if the caller desired so */
+
+ if (keepReg == RegSet::FREE_REG)
+ {
+ regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
+ // Can't use RegSet::FREE_REG on a GC type
+ noway_assert(!varTypeIsGC(tree->gtType));
+ }
+ else
+ {
+ noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
+ }
+}
+
+/*****************************************************************************
+ *
+ * Move one half of a register pair to its new regPair(half).
+ */
+
+// inline
+void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off)
+{
+ if (src == REG_STK)
+ {
+ // handle long to unsigned long overflow casts
+ while (tree->gtOper == GT_CAST)
+ {
+ noway_assert(tree->gtType == TYP_LONG);
+ tree = tree->gtCast.CastOp();
+ }
+ noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
+ noway_assert(tree->gtType == TYP_LONG);
+ inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
+ regTracker.rsTrackRegTrash(dst);
+ }
+ else
+ {
+ regTracker.rsTrackRegCopy(dst, src);
+ inst_RV_RV(INS_mov, dst, src, TYP_INT);
+ }
+}
+
+/*****************************************************************************
+ *
+ * The given long value is in a register pair, but it's not an acceptable
+ * one. We have to move the value into a register pair in 'needReg' (if
+ * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
+ *
+ * Important note: if 'needReg' is non-zero, we assume the current pair
+ * has not been marked as free. If, OTOH, 'newPair' is specified, we
+ * assume that the current register pair is marked as used and free it.
+ */
+
+void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair)
+{
+ regPairNo oldPair;
+
+ regNumber oldLo;
+ regNumber oldHi;
+ regNumber newLo;
+ regNumber newHi;
+
+ /* Either a target set or a specific pair may be requested */
+
+ noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
+
+ /* Get hold of the current pair */
+
+ oldPair = tree->gtRegPair;
+ noway_assert(oldPair != newPair);
+
+ /* Are we supposed to move to a specific pair? */
+
+ if (newPair != REG_PAIR_NONE)
+ {
+ regMaskTP oldMask = genRegPairMask(oldPair);
+ regMaskTP loMask = genRegMask(genRegPairLo(newPair));
+ regMaskTP hiMask = genRegMask(genRegPairHi(newPair));
+ regMaskTP overlap = oldMask & (loMask | hiMask);
+
+ /* First lock any registers that are in both pairs */
+
+ noway_assert((regSet.rsMaskUsed & overlap) == overlap);
+ noway_assert((regSet.rsMaskLock & overlap) == 0);
+ regSet.rsMaskLock |= overlap;
+
+ /* Make sure any additional registers we need are free */
+
+ if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
+ {
+ regSet.rsGrabReg(loMask);
+ }
+
+ if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
+ {
+ regSet.rsGrabReg(hiMask);
+ }
+
+ /* Unlock those registers we have temporarily locked */
+
+ noway_assert((regSet.rsMaskUsed & overlap) == overlap);
+ noway_assert((regSet.rsMaskLock & overlap) == overlap);
+ regSet.rsMaskLock -= overlap;
+
+ /* We can now free the old pair */
+
+ regSet.rsMarkRegFree(oldMask);
+ }
+ else
+ {
+ /* Pick the new pair based on the caller's stated preference */
+
+ newPair = regSet.rsGrabRegPair(needReg);
+ }
+
+ // If grabbed pair is the same as old one we're done
+ if (newPair == oldPair)
+ {
+ noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
+ newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
+ return;
+ }
+
+ /* Move the values from the old pair into the new one */
+
+ oldLo = genRegPairLo(oldPair);
+ oldHi = genRegPairHi(oldPair);
+ newLo = genRegPairLo(newPair);
+ newHi = genRegPairHi(newPair);
+
+ noway_assert(newLo != REG_STK && newHi != REG_STK);
+
+ /* Careful - the register pairs might overlap */
+
+ if (newLo == oldLo)
+ {
+ /* The low registers are identical, just move the upper half */
+
+ noway_assert(newHi != oldHi);
+ genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+ }
+ else
+ {
+ /* The low registers are different, are the upper ones the same? */
+
+ if (newHi == oldHi)
+ {
+ /* Just move the lower half, then */
+ genMoveRegPairHalf(tree, newLo, oldLo, 0);
+ }
+ else
+ {
+ /* Both sets are different - is there an overlap? */
+
+ if (newLo == oldHi)
+ {
+ /* Are high and low simply swapped ? */
+
+ if (newHi == oldLo)
+ {
+#ifdef _TARGET_ARM_
+ /* Let's use XOR swap to reduce register pressure. */
+ inst_RV_RV(INS_eor, oldLo, oldHi);
+ inst_RV_RV(INS_eor, oldHi, oldLo);
+ inst_RV_RV(INS_eor, oldLo, oldHi);
+#else
+ inst_RV_RV(INS_xchg, oldHi, oldLo);
+#endif
+ regTracker.rsTrackRegSwap(oldHi, oldLo);
+ }
+ else
+ {
+ /* New lower == old higher, so move higher half first */
+
+ noway_assert(newHi != oldLo);
+ genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+ genMoveRegPairHalf(tree, newLo, oldLo, 0);
+ }
+ }
+ else
+ {
+ /* Move lower half first */
+ genMoveRegPairHalf(tree, newLo, oldLo, 0);
+ genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+ }
+ }
+ }
+
+ /* Record the fact that we're switching to another pair */
+
+ tree->gtRegPair = newPair;
+}
+
+/*****************************************************************************
+ *
+ * Compute the value 'tree' into the register pair specified by 'needRegPair'
+ * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
+ * those in avoidReg.
+ * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
+ * value ends up in as being used.
+ */
+
+void CodeGen::genComputeRegPair(
+ GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
+{
+ regMaskTP regMask;
+ regPairNo regPair;
+ regMaskTP tmpMask;
+ regMaskTP tmpUsedMask;
+ regNumber rLo;
+ regNumber rHi;
+
+ noway_assert(isRegPairType(tree->gtType));
+
+ if (needRegPair == REG_PAIR_NONE)
+ {
+ if (freeOnly)
+ {
+ regMask = regSet.rsRegMaskFree() & ~avoidReg;
+ if (genMaxOneBit(regMask))
+ regMask = regSet.rsRegMaskFree();
+ }
+ else
+ {
+ regMask = RBM_ALLINT & ~avoidReg;
+ }
+
+ if (genMaxOneBit(regMask))
+ regMask = regSet.rsRegMaskCanGrab();
+ }
+ else
+ {
+ regMask = genRegPairMask(needRegPair);
+ }
+
+ /* Generate the value, hopefully into the right register pair */
+
+ genCodeForTreeLng(tree, regMask, avoidReg);
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+ regPair = tree->gtRegPair;
+ tmpMask = genRegPairMask(regPair);
+
+ rLo = genRegPairLo(regPair);
+ rHi = genRegPairHi(regPair);
+
+ /* At least one half is in a real register */
+
+ noway_assert(rLo != REG_STK || rHi != REG_STK);
+
+ /* Did the value end up in an acceptable register pair? */
+
+ if (needRegPair != REG_PAIR_NONE)
+ {
+ if (needRegPair != regPair)
+ {
+ /* This is a workaround. If we specify a regPair for genMoveRegPair */
+ /* it expects the source pair being marked as used */
+ regSet.rsMarkRegPairUsed(tree);
+ genMoveRegPair(tree, 0, needRegPair);
+ }
+ }
+ else if (freeOnly)
+ {
+ /* Do we have to end up with a free register pair?
+ Something might have gotten freed up above */
+ bool mustMoveReg = false;
+
+ regMask = regSet.rsRegMaskFree() & ~avoidReg;
+
+ if (genMaxOneBit(regMask))
+ regMask = regSet.rsRegMaskFree();
+
+ if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
+ {
+ /* Note that we must call genMoveRegPair if one of our registers
+ comes from the used mask, so that it will be properly spilled. */
+
+ mustMoveReg = true;
+ }
+
+ if (genMaxOneBit(regMask))
+ regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
+
+ if (genMaxOneBit(regMask))
+ regMask |= regSet.rsRegMaskCanGrab();
+
+ /* Did the value end up in a free register pair? */
+
+ if (mustMoveReg)
+ {
+ /* We'll have to move the value to a free (trashable) pair */
+ genMoveRegPair(tree, regMask, REG_PAIR_NONE);
+ }
+ }
+ else
+ {
+ noway_assert(needRegPair == REG_PAIR_NONE);
+ noway_assert(!freeOnly);
+
+ /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
+ tmpUsedMask = tmpMask & regSet.rsMaskUsed;
+ tmpMask &= ~regSet.rsMaskUsed;
+
+ /* Make sure that the value is in "real" registers*/
+ if (rLo == REG_STK)
+ {
+ /* Get one of the desired registers, but exclude rHi */
+
+ regSet.rsLockReg(tmpMask);
+ regSet.rsLockUsedReg(tmpUsedMask);
+
+ regNumber reg = regSet.rsPickReg(regMask);
+
+ regSet.rsUnlockUsedReg(tmpUsedMask);
+ regSet.rsUnlockReg(tmpMask);
+
+ inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
+
+ tree->gtRegPair = gen2regs2pair(reg, rHi);
+
+ regTracker.rsTrackRegTrash(reg);
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ }
+ else if (rHi == REG_STK)
+ {
+ /* Get one of the desired registers, but exclude rLo */
+
+ regSet.rsLockReg(tmpMask);
+ regSet.rsLockUsedReg(tmpUsedMask);
+
+ regNumber reg = regSet.rsPickReg(regMask);
+
+ regSet.rsUnlockUsedReg(tmpUsedMask);
+ regSet.rsUnlockReg(tmpMask);
+
+ inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
+
+ tree->gtRegPair = gen2regs2pair(rLo, reg);
+
+ regTracker.rsTrackRegTrash(reg);
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ }
+ }
+
+ /* Does the caller want us to mark the register as used? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ /* In case we're computing a value into a register variable */
+
+ genUpdateLife(tree);
+
+ /* Mark the register as 'used' */
+
+ regSet.rsMarkRegPairUsed(tree);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Same as genComputeRegPair(), the only difference being that the result
+ * is guaranteed to end up in a trashable register pair.
+ */
+
+// inline
+void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
+{
+ genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
+}
+
+/*****************************************************************************
+ *
+ * The value 'tree' was earlier computed into a register pair; free up that
+ * register pair (but also make sure the value is presently in a register
+ * pair).
+ */
+
+void CodeGen::genReleaseRegPair(GenTreePtr tree)
+{
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* The register has been spilled -- reload it */
+
+ regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
+ return;
+ }
+
+ regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
+}
+
+/*****************************************************************************
+ *
+ * The value 'tree' was earlier computed into a register pair. Check whether
+ * either register of that pair has been spilled (and reload it if so), and
+ * if 'keepReg' is 0, free the register pair.
+ */
+
+void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg)
+{
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ regMaskTP regMask;
+
+ if (regPair == REG_PAIR_NONE)
+ regMask = RBM_NONE;
+ else
+ regMask = genRegPairMask(regPair);
+
+ /* The register pair has been spilled -- reload it */
+
+ regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
+ }
+
+ /* Does the caller insist on the value being in a specific place? */
+
+ if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
+ {
+ /* No good -- we'll have to move the value to a new place */
+
+ genMoveRegPair(tree, 0, regPair);
+
+ /* Mark the pair as used if appropriate */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegPairUsed(tree);
+
+ return;
+ }
+
+ /* Free the register pair if the caller desired so */
+
+ if (keepReg == RegSet::FREE_REG)
+ regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
+}
+
+/*****************************************************************************
+ *
+ * Compute the given long value into the specified register pair; don't mark
+ * the register pair as used.
+ */
+
+// inline
+void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
+{
+ genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
+ genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
+}
+
+/*****************************************************************************
+ * This helper makes sure that the regpair target of an assignment is
+ * available for use. This needs to be called in genCodeForTreeLng just before
+ * a long assignment, but must not be called until everything has been
+ * evaluated, or else we might try to spill enregistered variables.
+ *
+ */
+
+// inline
+void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
+{
+ /* Make sure the target of the store is available */
+
+ regNumber regLo = genRegPairLo(regPair);
+ regNumber regHi = genRegPairHi(regPair);
+
+ if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
+ regSet.rsSpillReg(regHi);
+
+ if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
+ regSet.rsSpillReg(regLo);
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Return true if the given tree 'addr' can be computed via an addressing mode,
+ * such as "[ebx+esi*4+20]". If the expression isn't an address mode already
+ * try to make it so (but we don't try 'too hard' to accomplish this).
+ *
+ * If we end up needing a register (or two registers) to hold some part(s) of the
+ * address, we return the use register mask via '*useMaskPtr'.
+ *
+ * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
+ * in use. The caller would then be responsible for calling
+ * regSet.rsMarkRegFree(*useMaskPtr).
+ *
+ * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
+ * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
+ */
+
+bool CodeGen::genMakeIndAddrMode(GenTreePtr addr,
+ GenTreePtr oper,
+ bool forLea,
+ regMaskTP regMask,
+ RegSet::KeepReg keepReg,
+ regMaskTP* useMaskPtr,
+ bool deferOK)
+{
+ if (addr->gtOper == GT_ARR_ELEM)
+ {
+ regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
+ *useMaskPtr = regs;
+ return true;
+ }
+
+ bool rev;
+ GenTreePtr rv1;
+ GenTreePtr rv2;
+ bool operIsArrIndex; // is oper an array index
+ GenTreePtr scaledIndex; // If scaled addressing mode can't be used
+
+ regMaskTP anyMask = RBM_ALLINT;
+
+ unsigned cns;
+ unsigned mul;
+
+ GenTreePtr tmp;
+ int ixv = INT_MAX; // unset value
+
+ GenTreePtr scaledIndexVal;
+
+ regMaskTP newLiveMask;
+ regMaskTP rv1Mask;
+ regMaskTP rv2Mask;
+
+ /* Deferred address mode forming NYI for x86 */
+
+ noway_assert(deferOK == false);
+
+ noway_assert(oper == NULL ||
+ ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
+ ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
+ operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
+
+ if (addr->gtOper == GT_LEA)
+ {
+ rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ rv1 = lea->Base();
+ rv2 = lea->Index();
+ mul = lea->gtScale;
+ cns = lea->gtOffset;
+
+ if (rv1 != NULL && rv2 == NULL && cns == 0 && (rv1->gtFlags & GTF_REG_VAL) != 0)
+ {
+ scaledIndex = NULL;
+ goto YES;
+ }
+ }
+ else
+ {
+ // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
+ // EASIER TO MERGE
+
+ /* Is the complete address already sitting in a register? */
+
+ if ((addr->gtFlags & GTF_REG_VAL) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
+ {
+ genUpdateLife(addr);
+
+ rv1 = addr;
+ rv2 = scaledIndex = 0;
+ cns = 0;
+
+ goto YES;
+ }
+
+ /* Is it an absolute address */
+
+ if (addr->IsCnsIntOrI())
+ {
+ rv1 = rv2 = scaledIndex = 0;
+ // along this code path cns is never used, so place a BOGUS value in it as proof
+ // cns = addr->gtIntCon.gtIconVal;
+ cns = UINT_MAX;
+
+ goto YES;
+ }
+
+ /* Is there a chance of forming an address mode? */
+
+ if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
+ {
+ /* This better not be an array index */
+ noway_assert(!operIsArrIndex);
+
+ return false;
+ }
+ // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
+ }
+
+ /* For scaled array access, RV2 may not be pointing to the index of the
+ array if the CPU does not support the needed scaling factor. We will
+ make it point to the actual index, and scaledIndex will point to
+ the scaled value */
+
+ scaledIndex = NULL;
+ scaledIndexVal = NULL;
+
+ if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
+ rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
+ {
+ scaledIndex = rv2;
+ compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
+
+ noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
+ }
+
+ /* Has the address already been computed? */
+
+ if (addr->gtFlags & GTF_REG_VAL)
+ {
+ if (forLea)
+ return true;
+
+ rv1 = addr;
+ rv2 = NULL;
+ scaledIndex = NULL;
+ genUpdateLife(addr);
+ goto YES;
+ }
+
+ /*
+ Here we have the following operands:
+
+ rv1 ..... base address
+ rv2 ..... offset value (or NULL)
+ mul ..... multiplier for rv2 (or 0)
+ cns ..... additional constant (or 0)
+
+ The first operand must be present (and be an address) unless we're
+ computing an expression via 'LEA'. The scaled operand is optional,
+ but must not be a pointer if present.
+ */
+
+ noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Make sure both rv1 and rv2 (if present) are in registers
+ *
+ */
+
+ // Trivial case : Is either rv1 or rv2 a NULL ?
+
+ if (!rv2)
+ {
+ /* A single operand, make sure it's in a register */
+
+ if (cns != 0)
+ {
+ // In the case where "rv1" is already in a register, there's no reason to get into a
+ // register in "regMask" yet, if there's a non-zero constant that we're going to add;
+ // if there is, we can do an LEA.
+ genCodeForTree(rv1, RBM_NONE);
+ }
+ else
+ {
+ genCodeForTree(rv1, regMask);
+ }
+ goto DONE_REGS;
+ }
+ else if (!rv1)
+ {
+ /* A single (scaled) operand, make sure it's in a register */
+
+ genCodeForTree(rv2, 0);
+ goto DONE_REGS;
+ }
+
+ /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
+ they are in registers */
+
+ noway_assert(rv1 && rv2);
+
+ /* If we have to check a constant array index, compare it against
+ the array dimension (see below) but then fold the index with a
+ scaling factor (if any) and additional offset (if any).
+ */
+
+ if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
+ {
+ if (scaledIndex != NULL)
+ {
+ assert(rv2 == scaledIndex && scaledIndexVal != NULL);
+ rv2 = scaledIndexVal;
+ }
+ /* We must have a range-checked index operation */
+
+ noway_assert(operIsArrIndex);
+
+ /* Get hold of the index value and see if it's a constant */
+
+ if (rv2->IsIntCnsFitsInI32())
+ {
+ ixv = (int)rv2->gtIntCon.gtIconVal;
+ // Maybe I should just set "fold" true in the call to genMakeAddressable above.
+ if (scaledIndex != NULL)
+ {
+ int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
+ // multiple of 2^6.
+ if (mul == 0)
+ {
+ mul = scale;
+ }
+ else
+ {
+ mul *= scale;
+ }
+ }
+ rv2 = scaledIndex = NULL;
+
+ /* Add the scaled index into the added value */
+
+ if (mul)
+ cns += ixv * mul;
+ else
+ cns += ixv;
+
+ /* Make sure 'rv1' is in a register */
+
+ genCodeForTree(rv1, regMask);
+
+ goto DONE_REGS;
+ }
+ }
+
+ if (rv1->gtFlags & GTF_REG_VAL)
+ {
+ /* op1 already in register - how about op2? */
+
+ if (rv2->gtFlags & GTF_REG_VAL)
+ {
+ /* Great - both operands are in registers already. Just update
+ the liveness and we are done. */
+
+ if (rev)
+ {
+ genUpdateLife(rv2);
+ genUpdateLife(rv1);
+ }
+ else
+ {
+ genUpdateLife(rv1);
+ genUpdateLife(rv2);
+ }
+
+ goto DONE_REGS;
+ }
+
+ /* rv1 is in a register, but rv2 isn't */
+
+ if (!rev)
+ {
+ /* rv1 is already materialized in a register. Just update liveness
+ to rv1 and generate code for rv2 */
+
+ genUpdateLife(rv1);
+ regSet.rsMarkRegUsed(rv1, oper);
+ }
+
+ goto GEN_RV2;
+ }
+ else if (rv2->gtFlags & GTF_REG_VAL)
+ {
+ /* rv2 is in a register, but rv1 isn't */
+
+ noway_assert(rv2->gtOper == GT_REG_VAR);
+
+ if (rev)
+ {
+ /* rv2 is already materialized in a register. Update liveness
+ to after rv2 and then hang on to rv2 */
+
+ genUpdateLife(rv2);
+ regSet.rsMarkRegUsed(rv2, oper);
+ }
+
+ /* Generate the for the first operand */
+
+ genCodeForTree(rv1, regMask);
+
+ if (rev)
+ {
+ // Free up rv2 in the right fashion (it might be re-marked if keepReg)
+ regSet.rsMarkRegUsed(rv1, oper);
+ regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
+ genReleaseReg(rv2);
+ regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
+ genReleaseReg(rv1);
+ }
+ else
+ {
+ /* We have evaluated rv1, and now we just need to update liveness
+ to rv2 which was already in a register */
+
+ genUpdateLife(rv2);
+ }
+
+ goto DONE_REGS;
+ }
+
+ if (forLea && !cns)
+ return false;
+
+ /* Make sure we preserve the correct operand order */
+
+ if (rev)
+ {
+ /* Generate the second operand first */
+
+ // Determine what registers go live between rv2 and rv1
+ newLiveMask = genNewLiveRegMask(rv2, rv1);
+
+ rv2Mask = regMask & ~newLiveMask;
+ rv2Mask &= ~rv1->gtRsvdRegs;
+
+ if (rv2Mask == RBM_NONE)
+ {
+ // The regMask hint cannot be honored
+ // We probably have a call that trashes the register(s) in regMask
+ // so ignore the regMask hint, but try to avoid using
+ // the registers in newLiveMask and the rv1->gtRsvdRegs
+ //
+ rv2Mask = RBM_ALLINT & ~newLiveMask;
+ rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
+ }
+
+ genCodeForTree(rv2, rv2Mask);
+ regMask &= ~genRegMask(rv2->gtRegNum);
+
+ regSet.rsMarkRegUsed(rv2, oper);
+
+ /* Generate the first operand second */
+
+ genCodeForTree(rv1, regMask);
+ regSet.rsMarkRegUsed(rv1, oper);
+
+ /* Free up both operands in the right order (they might be
+ re-marked as used below)
+ */
+ regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
+ genReleaseReg(rv2);
+ regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
+ genReleaseReg(rv1);
+ }
+ else
+ {
+ /* Get the first operand into a register */
+
+ // Determine what registers go live between rv1 and rv2
+ newLiveMask = genNewLiveRegMask(rv1, rv2);
+
+ rv1Mask = regMask & ~newLiveMask;
+ rv1Mask &= ~rv2->gtRsvdRegs;
+
+ if (rv1Mask == RBM_NONE)
+ {
+ // The regMask hint cannot be honored
+ // We probably have a call that trashes the register(s) in regMask
+ // so ignore the regMask hint, but try to avoid using
+ // the registers in liveMask and the rv2->gtRsvdRegs
+ //
+ rv1Mask = RBM_ALLINT & ~newLiveMask;
+ rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
+ }
+
+ genCodeForTree(rv1, rv1Mask);
+ regSet.rsMarkRegUsed(rv1, oper);
+
+ GEN_RV2:
+
+ /* Here, we need to get rv2 in a register. We have either already
+ materialized rv1 into a register, or it was already in a one */
+
+ noway_assert(rv1->gtFlags & GTF_REG_VAL);
+ noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
+
+ /* Generate the second operand as well */
+
+ regMask &= ~genRegMask(rv1->gtRegNum);
+ genCodeForTree(rv2, regMask);
+
+ if (rev)
+ {
+ /* rev==true means the evaluation order is rv2,rv1. We just
+ evaluated rv2, and rv1 was already in a register. Just
+ update liveness to rv1 and we are done. */
+
+ genUpdateLife(rv1);
+ }
+ else
+ {
+ /* We have evaluated rv1 and rv2. Free up both operands in
+ the right order (they might be re-marked as used below) */
+
+ /* Even though we have not explicitly marked rv2 as used,
+ rv2->gtRegNum may be used if rv2 is a multi-use or
+ an enregistered variable. */
+ regMaskTP rv2Used;
+ regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
+
+ /* Check for special case both rv1 and rv2 are the same register */
+ if (rv2Used != genRegMask(rv1->gtRegNum))
+ {
+ genReleaseReg(rv1);
+ regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
+ }
+ else
+ {
+ regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
+ genReleaseReg(rv1);
+ }
+ }
+ }
+
+/*-------------------------------------------------------------------------
+ *
+ * At this point, both rv1 and rv2 (if present) are in registers
+ *
+ */
+
+DONE_REGS:
+
+ /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
+
+ if (rv1 && !(rv1->gtFlags & GTF_REG_VAL))
+ return false;
+ if (rv2 && !(rv2->gtFlags & GTF_REG_VAL))
+ return false;
+
+YES:
+
+ // *(intVar1+intVar1) causes problems as we
+ // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
+ // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
+ // that currently as we return a single mask in useMaskPtr.
+
+ if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL))
+ {
+ if (rv1->gtRegNum == rv2->gtRegNum)
+ {
+ noway_assert(!operIsArrIndex);
+ return false;
+ }
+ }
+
+ /* Check either register operand to see if it needs to be saved */
+
+ if (rv1)
+ {
+ noway_assert(rv1->gtFlags & GTF_REG_VAL);
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ regSet.rsMarkRegUsed(rv1, oper);
+ }
+ else
+ {
+ /* If the register holds an address, mark it */
+
+ gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
+ }
+ }
+
+ if (rv2)
+ {
+ noway_assert(rv2->gtFlags & GTF_REG_VAL);
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegUsed(rv2, oper);
+ }
+
+ if (deferOK)
+ {
+ noway_assert(!scaledIndex);
+ return true;
+ }
+
+ /* Compute the set of registers the address depends on */
+
+ regMaskTP useMask = RBM_NONE;
+
+ if (rv1)
+ {
+ if (rv1->gtFlags & GTF_SPILLED)
+ regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
+
+ noway_assert(rv1->gtFlags & GTF_REG_VAL);
+ useMask |= genRegMask(rv1->gtRegNum);
+ }
+
+ if (rv2)
+ {
+ if (rv2->gtFlags & GTF_SPILLED)
+ {
+ if (rv1)
+ {
+ regMaskTP lregMask = genRegMask(rv1->gtRegNum);
+ regMaskTP used;
+
+ regSet.rsLockReg(lregMask, &used);
+ regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
+ regSet.rsUnlockReg(lregMask, used);
+ }
+ else
+ regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
+ }
+ noway_assert(rv2->gtFlags & GTF_REG_VAL);
+ useMask |= genRegMask(rv2->gtRegNum);
+ }
+
+ /* Tell the caller which registers we need to hang on to */
+
+ *useMaskPtr = useMask;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
+ */
+
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+ GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+ GenTreePtr arrLen = bndsChk->gtArrLen;
+ GenTreePtr arrRef = NULL;
+ int lenOffset = 0;
+
+ // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
+ // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
+ // a register.
+
+ if (arrLen->OperGet() == GT_ARR_LENGTH)
+ {
+ GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
+ lenOffset = arrLenExact->ArrLenOffset();
+
+#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
+ // We always load the length into a register on ARM and x64.
+
+ // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
+ // lengths, but the index expression *can* be native int (64-bits)
+ arrRef = arrLenExact->ArrRef();
+ genCodeForTree(arrRef, RBM_ALLINT);
+ noway_assert(arrRef->gtFlags & GTF_REG_VAL);
+ regSet.rsMarkRegUsed(arrRef);
+ noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
+#endif
+ }
+#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
+ // This is another form in which we have an array reference and a constant length. Don't use
+ // on LOAD_STORE or 64BIT.
+ else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
+ {
+ genCodeForTree(arrRef, RBM_ALLINT);
+ noway_assert(arrRef->gtFlags & GTF_REG_VAL);
+ regSet.rsMarkRegUsed(arrRef);
+ noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
+ }
+#endif
+
+ // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
+ if (arrRef == NULL)
+ {
+ // (Unless it's a constant.)
+ if (!arrLen->IsCnsIntOrI())
+ {
+ genCodeForTree(arrLen, RBM_ALLINT);
+ regSet.rsMarkRegUsed(arrLen);
+
+ noway_assert(arrLen->gtFlags & GTF_REG_VAL);
+ noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
+ }
+ }
+
+ /* Is the array index a constant value? */
+ GenTreePtr index = bndsChk->gtIndex;
+ if (!index->IsCnsIntOrI())
+ {
+ // No, it's not a constant.
+ genCodeForTree(index, RBM_ALLINT);
+ regSet.rsMarkRegUsed(index);
+
+ // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
+ // from its register, get it back in a register.
+ if (arrRef != NULL)
+ genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
+ else if (!arrLen->IsCnsIntOrI())
+ genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
+
+ /* Make sure we have the values we expect */
+ noway_assert(index->gtFlags & GTF_REG_VAL);
+ noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
+
+ noway_assert(index->TypeGet() == TYP_I_IMPL ||
+ (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
+ var_types indxType = index->TypeGet();
+ if (indxType != TYP_I_IMPL)
+ indxType = TYP_INT;
+
+ if (arrRef != NULL)
+ { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
+
+ /* Generate "cmp index, [arrRef+LenOffs]" */
+ inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
+ }
+ else if (arrLen->IsCnsIntOrI())
+ {
+ ssize_t len = arrLen->AsIntConCommon()->IconValue();
+ inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
+ }
+ else
+ {
+ inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
+ }
+
+ /* Generate "jae <fail_label>" */
+
+ noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ else
+ {
+ /* Generate "cmp [rv1+LenOffs], cns" */
+
+ bool indIsInt = true;
+#ifdef _TARGET_64BIT_
+ int ixv = 0;
+ ssize_t ixvFull = index->AsIntConCommon()->IconValue();
+ if (ixvFull > INT32_MAX)
+ {
+ indIsInt = false;
+ }
+ else
+ {
+ ixv = (int)ixvFull;
+ }
+#else
+ ssize_t ixvFull = index->AsIntConCommon()->IconValue();
+ int ixv = (int)ixvFull;
+#endif
+ if (arrRef != NULL && indIsInt)
+ { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
+ /* Generate "cmp [arrRef+LenOffs], ixv" */
+ inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
+ // Generate "jbe <fail_label>"
+ emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ else if (arrLen->IsCnsIntOrI())
+ {
+ ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
+ // Both are constants; decide at compile time.
+ if (!(0 <= ixvFull && ixvFull < lenv))
+ {
+ genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ }
+ else if (!indIsInt)
+ {
+ genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ else
+ {
+ /* Generate "cmp arrLen, ixv" */
+ inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
+ // Generate "jbe <fail_label>"
+ emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ }
+
+ // Free the registers that were used.
+ if (arrRef != NULL)
+ {
+ regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
+ }
+ else if (!arrLen->IsCnsIntOrI())
+ {
+ regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
+ }
+
+ if (!index->IsCnsIntOrI())
+ {
+ regSet.rsMarkRegFree(index->gtRegNum, index);
+ }
+}
+
+/*****************************************************************************
+ *
+ * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
+ * Otherwise, check if rvalue is in register. If so, mark it. Then
+ * call genMakeAddressable(). Needed because genMakeAddressable is used
+ * for both lvalue and rvalue, and we only can do this for rvalue.
+ */
+
+// inline
+regMaskTP CodeGen::genMakeRvalueAddressable(
+ GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
+{
+ regNumber reg;
+
+#if REDUNDANT_LOAD
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
+
+ if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
+ {
+ noway_assert(!isRegPairType(tree->gtType));
+
+ genMarkTreeInReg(tree, reg);
+ }
+ }
+
+#endif
+
+ return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
+}
+
+/*****************************************************************************/
+
+bool CodeGen::genIsLocalLastUse(GenTreePtr tree)
+{
+ const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+
+ noway_assert(tree->OperGet() == GT_LCL_VAR);
+ noway_assert(varDsc->lvTracked);
+
+ return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
+}
+
+/*****************************************************************************
+ *
+ * This is genMakeAddressable(GT_ARR_ELEM).
+ * Makes the array-element addressible and returns the addressibility registers.
+ * It also marks them as used if keepReg==RegSet::KEEP_REG.
+ * tree is the dependant tree.
+ *
+ * Note that an array-element needs 2 registers to be addressibile, the
+ * array-object and the offset. This function marks gtArrObj and gtArrInds[0]
+ * with the 2 registers so that other functions (like instGetAddrMode()) know
+ * where to look for the offset to use.
+ */
+
+regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+ noway_assert(arrElem->gtOper == GT_ARR_ELEM);
+ noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
+
+ /* Evaluate all the operands. We don't evaluate them into registers yet
+ as GT_ARR_ELEM does not reorder the evaluation of the operands, and
+ hence may use a sub-optimal ordering. We try to improve this
+ situation somewhat by accessing the operands in stages
+ (genMakeAddressable2 + genComputeAddressable and
+ genCompIntoFreeReg + genRecoverReg).
+
+ Note: we compute operands into free regs to avoid multiple uses of
+ the same register. Multi-use would cause problems when we free
+ registers in FIFO order instead of the assumed LIFO order that
+ applies to all type of tree nodes except for GT_ARR_ELEM.
+ */
+
+ GenTreePtr arrObj = arrElem->gtArrElem.gtArrObj;
+ unsigned rank = arrElem->gtArrElem.gtArrRank;
+ var_types elemType = arrElem->gtArrElem.gtArrElemType;
+ regMaskTP addrReg = RBM_NONE;
+ regMaskTP regNeed = RBM_ALLINT;
+
+#if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
+ // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
+ // since the arrObj participates in the lea/add instruction
+ // that computes ARG_0 we should avoid putting it in ARG_1
+ //
+ if (varTypeIsGC(elemType))
+ {
+ regNeed &= ~RBM_ARG_1;
+ }
+#endif
+
+ // Strip off any comma expression.
+ arrObj = genCodeForCommaTree(arrObj);
+
+ // Having generated the code for the comma, we don't care about it anymore.
+ arrElem->gtArrElem.gtArrObj = arrObj;
+
+ // If the array ref is a stack var that's dying here we have to move it
+ // into a register (regalloc already counts of this), as if it's a GC pointer
+ // it can be collected from here on. This is not an issue for locals that are
+ // in a register, as they get marked as used an will be tracked.
+ // The bug that caused this is #100776. (untracked vars?)
+ if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
+ !genMarkLclVar(arrObj))
+ {
+ genCodeForTree(arrObj, regNeed);
+ regSet.rsMarkRegUsed(arrObj, 0);
+ addrReg = genRegMask(arrObj->gtRegNum);
+ }
+ else
+ {
+ addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
+ true, // forLoadStore
+ false, // smallOK
+ false, // deferOK
+ true); // evalSideEffs
+ }
+
+ unsigned dim;
+ for (dim = 0; dim < rank; dim++)
+ genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
+
+ /* Ensure that the array-object is in a register */
+
+ addrReg = genKeepAddressable(arrObj, addrReg);
+ genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
+
+ regNumber arrReg = arrObj->gtRegNum;
+ regMaskTP arrRegMask = genRegMask(arrReg);
+ regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
+ regSet.rsLockUsedReg(arrRegMask);
+
+ /* Now process all the indices, do the range check, and compute
+ the offset of the element */
+
+ regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
+
+ for (dim = 0; dim < rank; dim++)
+ {
+ GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim];
+
+ /* Get the index into a free register (other than the register holding the array) */
+
+ genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
+
+#if CPU_LOAD_STORE_ARCH
+ /* Subtract the lower bound, and do the range check */
+
+ regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
+ getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
+ regTracker.rsTrackRegTrash(valueReg);
+ getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
+ regTracker.rsTrackRegTrash(index->gtRegNum);
+
+ getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+ getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
+#else
+ /* Subtract the lower bound, and do the range check */
+ getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
+ regTracker.rsTrackRegTrash(index->gtRegNum);
+
+ getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+#endif
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
+
+ if (dim == 0)
+ {
+ /* Hang on to the register of the first index */
+
+ noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
+ accReg = index->gtRegNum;
+ noway_assert(accReg != arrReg);
+ regSet.rsLockUsedReg(genRegMask(accReg));
+ }
+ else
+ {
+ /* Evaluate accReg = accReg*dim_size + index */
+
+ noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+ regTracker.rsTrackRegTrash(valueReg);
+ getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
+#else
+ getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+#endif
+
+ inst_RV_RV(INS_add, accReg, index->gtRegNum);
+ regSet.rsMarkRegFree(index->gtRegNum, index);
+ regTracker.rsTrackRegTrash(accReg);
+ }
+ }
+
+ if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
+ {
+ regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
+
+ getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
+ regTracker.rsTrackRegTrash(accReg);
+ }
+
+ regSet.rsUnlockUsedReg(genRegMask(arrReg));
+ regSet.rsUnlockUsedReg(genRegMask(accReg));
+
+ regSet.rsMarkRegFree(genRegMask(arrReg));
+ regSet.rsMarkRegFree(genRegMask(accReg));
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ /* We mark the addressability registers on arrObj and gtArrInds[0].
+ instGetAddrMode() knows to work with this. */
+
+ regSet.rsMarkRegUsed(arrObj, tree);
+ regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
+ }
+
+ return genRegMask(arrReg) | genRegMask(accReg);
+}
+
+/*****************************************************************************
+ *
+ * Make sure the given tree is addressable. 'needReg' is a mask that indicates
+ * the set of registers we would prefer the destination tree to be computed
+ * into (RBM_NONE means no preference).
+ *
+ * 'tree' can subsequently be used with the inst_XX_TT() family of functions.
+ *
+ * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
+ * on as used, and return the mask for that register set (if no registers
+ * are marked as used, RBM_NONE is returned).
+ *
+ * If 'smallOK' is not true and the datatype being address is a byte or short,
+ * then the tree is forced into a register. This is useful when the machine
+ * instruction being emitted does not have a byte or short version.
+ *
+ * The "deferOK" parameter indicates the mode of operation - when it's false,
+ * upon returning an actual address mode must have been formed (i.e. it must
+ * be possible to immediately call one of the inst_TT methods to operate on
+ * the value). When "deferOK" is true, we do whatever it takes to be ready
+ * to form the address mode later - for example, if an index address mode on
+ * a particular CPU requires the use of a specific register, we usually don't
+ * want to immediately grab that register for an address mode that will only
+ * be needed later. The convention is to call genMakeAddressable() with
+ * "deferOK" equal to true, do whatever work is needed to prepare the other
+ * operand, call genMakeAddressable() with "deferOK" equal to false, and
+ * finally call one of the inst_TT methods right after that.
+ *
+ * If we do any other codegen after genMakeAddressable(tree) which can
+ * potentially spill the addressability registers, genKeepAddressable()
+ * needs to be called before accessing the tree again.
+ *
+ * genDoneAddressable() needs to be called when we are done with the tree
+ * to free the addressability registers.
+ */
+
+regMaskTP CodeGen::genMakeAddressable(
+ GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
+{
+ GenTreePtr addr = NULL;
+ regMaskTP regMask;
+
+ /* Is the value simply sitting in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ genUpdateLife(tree);
+
+ goto GOT_VAL;
+ }
+
+ // TODO: If the value is for example a cast of float -> int, compute
+ // TODO: the converted value into a stack temp, and leave it there,
+ // TODO: since stack temps are always addressable. This would require
+ // TODO: recording the fact that a particular tree is in a stack temp.
+
+ /* byte/char/short operand -- is this acceptable to the caller? */
+
+ if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
+ goto EVAL_TREE;
+
+ // Evaluate non-last elements of comma expressions, to get to the last.
+ tree = genCodeForCommaTree(tree);
+
+ switch (tree->gtOper)
+ {
+ case GT_LCL_FLD:
+
+ // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+
+ genUpdateLife(tree);
+ return 0;
+
+ case GT_LCL_VAR:
+
+ if (!genMarkLclVar(tree))
+ {
+ genUpdateLife(tree);
+ return 0;
+ }
+
+ __fallthrough; // it turns out the variable lives in a register
+
+ case GT_REG_VAR:
+
+ genUpdateLife(tree);
+
+ goto GOT_VAL;
+
+ case GT_CLS_VAR:
+
+ return 0;
+
+ case GT_CNS_INT:
+#ifdef _TARGET_64BIT_
+ // Non-relocs will be sign extended, so we don't have to enregister
+ // constants that are equivalent to a sign-extended int.
+ // Relocs can be left alone if they are RIP-relative.
+ if ((genTypeSize(tree->TypeGet()) > 4) &&
+ (!tree->IsIntCnsFitsInI32() ||
+ (tree->IsIconHandle() &&
+ (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
+ {
+ break;
+ }
+#endif // _TARGET_64BIT_
+ __fallthrough;
+
+ case GT_CNS_LNG:
+ case GT_CNS_DBL:
+ // For MinOpts, we don't do constant folding, so we have
+ // constants showing up in places we don't like.
+ // force them into a register now to prevent that.
+ if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
+ return 0;
+ break;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+
+ /* Try to make the address directly addressable */
+
+ if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
+ needReg, keepReg, &regMask, deferOK))
+ {
+ genUpdateLife(tree);
+ return regMask;
+ }
+
+ /* No good, we'll have to load the address into a register */
+
+ addr = tree;
+ tree = tree->gtOp.gtOp1;
+ break;
+
+ default:
+ break;
+ }
+
+EVAL_TREE:
+
+ /* Here we need to compute the value 'tree' into a register */
+
+ genCodeForTree(tree, needReg);
+
+GOT_VAL:
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+ if (isRegPairType(tree->gtType))
+ {
+ /* Are we supposed to hang on to the register? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegPairUsed(tree);
+
+ regMask = genRegPairMask(tree->gtRegPair);
+ }
+ else
+ {
+ /* Are we supposed to hang on to the register? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegUsed(tree, addr);
+
+ regMask = genRegMask(tree->gtRegNum);
+ }
+
+ return regMask;
+}
+
+/*****************************************************************************
+ * Compute a tree (which was previously made addressable using
+ * genMakeAddressable()) into a register.
+ * needReg - mask of preferred registers.
+ * keepReg - should the computed register be marked as used by the tree
+ * freeOnly - target register needs to be a scratch register
+ */
+
+void CodeGen::genComputeAddressable(GenTreePtr tree,
+ regMaskTP addrReg,
+ RegSet::KeepReg keptReg,
+ regMaskTP needReg,
+ RegSet::KeepReg keepReg,
+ bool freeOnly)
+{
+ noway_assert(genStillAddressable(tree));
+ noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
+
+ genDoneAddressable(tree, addrReg, keptReg);
+
+ regNumber reg;
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ reg = tree->gtRegNum;
+
+ if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
+ goto MOVE_REG;
+ }
+ else
+ {
+ if (tree->OperIsConst())
+ {
+ /* Need to handle consts separately as we don't want to emit
+ "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
+ handles consts better for SMALL_CODE */
+
+ noway_assert(tree->IsCnsIntOrI());
+ reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
+ }
+ else
+ {
+ MOVE_REG:
+ reg = regSet.rsPickReg(needReg);
+
+ inst_RV_TT(INS_mov, reg, tree);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ }
+
+ genMarkTreeInReg(tree, reg);
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegUsed(tree);
+ else
+ gcInfo.gcMarkRegPtrVal(tree);
+}
+
+/*****************************************************************************
+ * Should be similar to genMakeAddressable() but gives more control.
+ */
+
+regMaskTP CodeGen::genMakeAddressable2(GenTreePtr tree,
+ regMaskTP needReg,
+ RegSet::KeepReg keepReg,
+ bool forLoadStore,
+ bool smallOK,
+ bool deferOK,
+ bool evalSideEffs)
+
+{
+ bool evalToReg = false;
+
+ if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
+ evalToReg = true;
+
+#if CPU_LOAD_STORE_ARCH
+ if (!forLoadStore)
+ evalToReg = true;
+#endif
+
+ if (evalToReg)
+ {
+ genCodeForTree(tree, needReg);
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+ if (isRegPairType(tree->gtType))
+ {
+ /* Are we supposed to hang on to the register? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegPairUsed(tree);
+
+ return genRegPairMask(tree->gtRegPair);
+ }
+ else
+ {
+ /* Are we supposed to hang on to the register? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegUsed(tree);
+
+ return genRegMask(tree->gtRegNum);
+ }
+ }
+ else
+ {
+ return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
+ }
+}
+
+/*****************************************************************************
+ *
+ * The given tree was previously passed to genMakeAddressable(); return
+ * 'true' if the operand is still addressable.
+ */
+
+// inline
+bool CodeGen::genStillAddressable(GenTreePtr tree)
+{
+ /* Has the value (or one or more of its sub-operands) been spilled? */
+
+ if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
+ return false;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Recursive helper to restore complex address modes. The 'lockPhase'
+ * argument indicates whether we're in the 'lock' or 'reload' phase.
+ */
+
+regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase)
+{
+ regMaskTP regMask = RBM_NONE;
+
+ /* Have we found a spilled value? */
+
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* Do nothing if we're locking, otherwise reload and lock */
+
+ if (!lockPhase)
+ {
+ /* Unspill the register */
+
+ regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
+
+ /* The value should now be sitting in a register */
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+ regMask = genRegMask(tree->gtRegNum);
+
+ /* Mark the register as used for the address */
+
+ regSet.rsMarkRegUsed(tree, addr);
+
+ /* Lock the register until we're done with the entire address */
+
+ regSet.rsMaskLock |= regMask;
+ }
+
+ return regMask;
+ }
+
+ /* Is this sub-tree sitting in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ regMask = genRegMask(tree->gtRegNum);
+
+ /* Lock the register if we're in the locking phase */
+
+ if (lockPhase)
+ regSet.rsMaskLock |= regMask;
+ }
+ else
+ {
+ /* Process any sub-operands of this node */
+
+ unsigned kind = tree->OperKind();
+
+ if (kind & GTK_SMPOP)
+ {
+ /* Unary/binary operator */
+
+ if (tree->gtOp.gtOp1)
+ regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
+ if (tree->gtGetOp2())
+ regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
+ }
+ else if (tree->gtOper == GT_ARR_ELEM)
+ {
+ /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
+ which holds the offset-calculation */
+
+ regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
+ regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
+ }
+ else if (tree->gtOper == GT_CMPXCHG)
+ {
+ regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
+ }
+ else
+ {
+ /* Must be a leaf/constant node */
+
+ noway_assert(kind & (GTK_LEAF | GTK_CONST));
+ }
+ }
+
+ return regMask;
+}
+
+/*****************************************************************************
+ *
+ * The given tree was previously passed to genMakeAddressable, but since then
+ * some of its registers are known to have been spilled; do whatever it takes
+ * to make the operand addressable again (typically by reloading any spilled
+ * registers).
+ */
+
+regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask)
+{
+ noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+
+ /* Is this a 'simple' register spill? */
+
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* The mask must match the original register/regpair */
+
+ if (isRegPairType(tree->gtType))
+ {
+ noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
+
+ regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
+
+ addrReg = genRegPairMask(tree->gtRegPair);
+ }
+ else
+ {
+ noway_assert(addrReg == genRegMask(tree->gtRegNum));
+
+ regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
+
+ addrReg = genRegMask(tree->gtRegNum);
+ }
+
+ noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+ regSet.rsMaskLock -= lockMask;
+
+ return addrReg;
+ }
+
+ /* We have a complex address mode with some of its sub-operands spilled */
+
+ noway_assert((tree->gtFlags & GTF_REG_VAL) == 0);
+ noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
+
+ /*
+ We'll proceed in several phases:
+
+ 1. Lock any registers that are part of the address mode and
+ have not been spilled. This prevents these registers from
+ getting spilled in step 2.
+
+ 2. Reload any registers that have been spilled; lock each
+ one right after it is reloaded.
+
+ 3. Unlock all the registers.
+ */
+
+ addrReg = genRestoreAddrMode(tree, tree, true);
+ addrReg |= genRestoreAddrMode(tree, tree, false);
+
+ /* Unlock all registers that the address mode uses */
+
+ lockMask |= addrReg;
+
+ noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+ regSet.rsMaskLock -= lockMask;
+
+ return addrReg;
+}
+
+/*****************************************************************************
+ *
+ * The given tree was previously passed to genMakeAddressable, but since then
+ * some of its registers might have been spilled ('addrReg' is the set of
+ * registers used by the address). This function makes sure the operand is
+ * still addressable (while avoiding any of the registers in 'avoidMask'),
+ * and returns the (possibly modified) set of registers that are used by
+ * the address (these will be marked as used on exit).
+ */
+
+regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask)
+{
+ /* Is the operand still addressable? */
+
+ tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
+
+ if (!genStillAddressable(tree))
+ {
+ if (avoidMask)
+ {
+ // Temporarily lock 'avoidMask' while we restore addressability
+ // genRestAddressable will unlock the 'avoidMask' for us
+ // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
+ // In regSet.rsRegMaskFree() we require that all locked register be marked as used
+ //
+ regSet.rsLockUsedReg(avoidMask);
+ }
+
+ addrReg = genRestAddressable(tree, addrReg, avoidMask);
+
+ noway_assert((regSet.rsMaskLock & avoidMask) == 0);
+ }
+
+ return addrReg;
+}
+
+/*****************************************************************************
+ *
+ * After we're finished with the given operand (which was previously marked
+ * by calling genMakeAddressable), this function must be called to free any
+ * registers that may have been used by the address.
+ * keptReg indicates if the addressability registers were marked as used
+ * by genMakeAddressable().
+ */
+
+void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
+{
+ if (keptReg == RegSet::FREE_REG)
+ {
+ // We exclude regSet.rsMaskUsed since the registers may be multi-used.
+ // ie. There may be a pending use in a higher-up tree.
+
+ addrReg &= ~regSet.rsMaskUsed;
+
+ /* addrReg was not marked as used. So just reset its GC info */
+ if (addrReg)
+ {
+ gcInfo.gcMarkRegSetNpt(addrReg);
+ }
+ }
+ else
+ {
+ /* addrReg was marked as used. So we need to free it up (which
+ will also reset its GC info) */
+
+ regSet.rsMarkRegFree(addrReg);
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Make sure the given floating point value is addressable, and return a tree
+ * that will yield the value as an addressing mode (this tree may differ from
+ * the one passed in, BTW). If the only way to make the value addressable is
+ * to evaluate into the FP stack, we do this and return zero.
+ */
+
+GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult)
+{
+ *regMaskPtr = 0;
+
+ switch (tree->gtOper)
+ {
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_CLS_VAR:
+ return tree;
+
+ case GT_CNS_DBL:
+ if (tree->gtType == TYP_FLOAT)
+ {
+ float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
+ return genMakeConst(&f, TYP_FLOAT, tree, false);
+ }
+ return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
+
+ case GT_IND:
+ case GT_NULLCHECK:
+
+ /* Try to make the address directly addressable */
+
+ if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
+ 0, RegSet::FREE_REG, regMaskPtr, false))
+ {
+ genUpdateLife(tree);
+ return tree;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+#if FEATURE_STACK_FP_X87
+ /* We have no choice but to compute the value 'tree' onto the FP stack */
+
+ genCodeForTreeFlt(tree);
+#endif
+ return 0;
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Display a string literal value (debug only).
+ */
+
+#ifdef DEBUG
+#endif
+
+/*****************************************************************************
+ *
+ * Generate code to check that the GS cookie wasn't thrashed by a buffer
+ * overrun. If pushReg is true, preserve all registers around code sequence.
+ * Otherwise, ECX maybe modified.
+ *
+ * TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+ // Make sure that EAX didn't die in the return expression
+ if (!pushReg && (compiler->info.compRetType == TYP_REF))
+ gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+ // Add cookie check code for unsafe buffers
+ BasicBlock* gsCheckBlk;
+ regMaskTP byrefPushedRegs = RBM_NONE;
+ regMaskTP norefPushedRegs = RBM_NONE;
+ regMaskTP pushedRegs = RBM_NONE;
+
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ if (compiler->gsGlobalSecurityCookieAddr == NULL)
+ {
+ // JIT case
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+
+ regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
+ regTracker.rsTrackRegTrash(reg);
+
+ if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
+ arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
+ }
+ else
+ {
+ // Load CookieVal into a register
+ regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
+ getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
+ }
+#else
+ getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
+ (int)compiler->gsGlobalSecurityCookieVal);
+#endif
+ }
+ else
+ {
+ regNumber regGSCheck;
+ regMaskTP regMaskGSCheck;
+#if CPU_LOAD_STORE_ARCH
+ regGSCheck = regSet.rsGrabReg(RBM_ALLINT);
+ regMaskGSCheck = genRegMask(regGSCheck);
+#else
+ // Don't pick the 'this' register
+ if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
+ (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
+ {
+ regGSCheck = REG_EDX;
+ regMaskGSCheck = RBM_EDX;
+ }
+ else
+ {
+ regGSCheck = REG_ECX;
+ regMaskGSCheck = RBM_ECX;
+ }
+
+ // NGen case
+ if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
+ {
+ pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
+ }
+ else
+ {
+ noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
+ }
+#endif
+#if defined(_TARGET_ARM_)
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
+#else
+ getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
+ (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+#endif // !_TARGET_ARM_
+ regTracker.rsTrashRegSet(regMaskGSCheck);
+#ifdef _TARGET_ARM_
+ regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
+ getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
+ regTracker.rsTrackRegTrash(regTmp);
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
+#else
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+#endif
+ }
+
+ gsCheckBlk = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, gsCheckBlk);
+ genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+ genDefineTempLabel(gsCheckBlk);
+
+ genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
+}
+
+/*****************************************************************************
+ *
+ * Generate any side effects within the given expression tree.
+ */
+
+void CodeGen::genEvalSideEffects(GenTreePtr tree)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+AGAIN:
+
+ /* Does this sub-tree contain any side-effects? */
+ if (tree->gtFlags & GTF_SIDE_EFFECT)
+ {
+#if FEATURE_STACK_FP_X87
+ /* Remember the current FP stack level */
+ int iTemps = genNumberTemps();
+#endif
+ if (tree->OperIsIndir())
+ {
+ regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ gcInfo.gcMarkRegPtrVal(tree);
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ }
+ // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
+ // do not need an additional null-check
+ /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
+ else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
+ {
+ /* Compare against any register to do null-check */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_XARCH_)
+ inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+#elif CPU_LOAD_STORE_ARCH
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
+ RegSet::FREE_REG);
+ }
+ else
+ {
+ genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
+ }
+#ifdef _TARGET_ARM_
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+#else
+ NYI("TARGET");
+#endif
+ }
+ else
+ {
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ }
+ }
+ else
+ {
+ /* Generate the expression and throw it away */
+ genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ gcInfo.gcMarkRegPtrVal(tree);
+ }
+ }
+#if FEATURE_STACK_FP_X87
+ /* If the tree computed a value on the FP stack, pop the stack */
+ if (genNumberTemps() > iTemps)
+ {
+ noway_assert(genNumberTemps() == iTemps + 1);
+ genDiscardStackFP(tree);
+ }
+#endif
+ return;
+ }
+
+ noway_assert(tree->gtOper != GT_ASG);
+
+ /* Walk the tree, just to mark any dead values appropriately */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+#if FEATURE_STACK_FP_X87
+ if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
+ {
+ genRegVarDeathStackFP(tree);
+ FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
+ }
+#endif
+ genUpdateLife(tree);
+ gcInfo.gcMarkRegPtrVal(tree);
+ return;
+ }
+
+ /* Must be a 'simple' unary/binary operator */
+
+ noway_assert(kind & GTK_SMPOP);
+
+ if (tree->gtGetOp2())
+ {
+ genEvalSideEffects(tree->gtOp.gtOp1);
+
+ tree = tree->gtOp.gtOp2;
+ goto AGAIN;
+ }
+ else
+ {
+ tree = tree->gtOp.gtOp1;
+ if (tree)
+ goto AGAIN;
+ }
+}
+
+/*****************************************************************************
+ *
+ * A persistent pointer value is being overwritten, record it for the GC.
+ *
+ * tgt : the destination being written to
+ * assignVal : the value being assigned (the source). It must currently be in a register.
+ * tgtAddrReg : the set of registers being used by "tgt"
+ *
+ * Returns : the mask of the scratch register that was used.
+ * RBM_NONE if a write-barrier is not needed.
+ */
+
+regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg)
+{
+ noway_assert(assignVal->gtFlags & GTF_REG_VAL);
+
+ GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
+ if (wbf == GCInfo::WBF_NoBarrier)
+ return RBM_NONE;
+
+ regMaskTP resultRegMask = RBM_NONE;
+
+#if FEATURE_WRITE_BARRIER
+
+ regNumber reg = assignVal->gtRegNum;
+
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+ if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
+ {
+#endif
+ const static int regToHelper[2][8] = {
+ // If the target is known to be in managed memory
+ {
+ CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
+ CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
+ },
+
+ // Don't know if the target is in managed memory
+ {
+ CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
+ CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
+ CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
+ },
+ };
+
+ noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
+ noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
+ noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
+ noway_assert(regToHelper[0][REG_ESP] == -1);
+ noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
+ noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
+ noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
+
+ noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
+ noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
+ noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
+ noway_assert(regToHelper[1][REG_ESP] == -1);
+ noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
+ noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
+ noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
+
+ noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
+
+ /*
+ Generate the following code:
+
+ lea edx, tgt
+ call write_barrier_helper_reg
+
+ First grab the RBM_WRITE_BARRIER register for the target address.
+ */
+
+ regNumber rg1;
+ bool trashOp1;
+
+ if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
+ {
+ rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
+
+ regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
+ regSet.rsMaskLock |= RBM_WRITE_BARRIER;
+
+ trashOp1 = false;
+ }
+ else
+ {
+ rg1 = REG_WRITE_BARRIER;
+
+ trashOp1 = true;
+ }
+
+ noway_assert(rg1 == REG_WRITE_BARRIER);
+
+ /* Generate "lea EDX, [addr-mode]" */
+
+ noway_assert(tgt->gtType == TYP_REF);
+ tgt->gtType = TYP_BYREF;
+ inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
+
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
+
+ // In case "tgt" was a comma:
+ tgt = tgt->gtEffectiveVal();
+
+ regTracker.rsTrackRegTrash(rg1);
+ gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
+ gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
+
+ /* Call the proper vm helper */
+
+ // enforced by gcIsWriteBarrierCandidate
+ noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
+
+ unsigned tgtAnywhere = 0;
+ if ((tgt->gtOper == GT_IND) &&
+ ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
+ {
+ tgtAnywhere = 1;
+ }
+
+ int helper = regToHelper[tgtAnywhere][reg];
+ resultRegMask = genRegMask(reg);
+
+ gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
+
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+
+ if (!trashOp1)
+ {
+ regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
+ regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
+ }
+
+ return resultRegMask;
+
+#ifdef DEBUG
+ }
+ else
+#endif
+#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+ {
+ /*
+ Generate the following code (or its equivalent on the given target):
+
+ mov arg1, srcReg
+ lea arg0, tgt
+ call write_barrier_helper
+
+ First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
+ */
+
+ if (reg != REG_ARG_1)
+ {
+ // We may need to spill whatever is in the ARG_1 register
+ //
+ if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
+ {
+ regSet.rsSpillReg(REG_ARG_1);
+ }
+
+ inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
+ }
+ resultRegMask = RBM_ARG_1;
+
+ regTracker.rsTrackRegTrash(REG_ARG_1);
+ gcInfo.gcMarkRegSetNpt(REG_ARG_1);
+ gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
+
+ bool free_arg1 = false;
+ if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
+ {
+ regSet.rsMaskUsed |= RBM_ARG_1;
+ free_arg1 = true;
+ }
+
+ // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
+
+ /* Generate "lea R0, [addr-mode]" */
+
+ noway_assert(tgt->gtType == TYP_REF);
+ tgt->gtType = TYP_BYREF;
+
+ tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
+
+ // We may need to spill whatever is in the ARG_0 register
+ //
+ if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0
+ ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
+ (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
+ {
+ regSet.rsSpillReg(REG_ARG_0);
+ }
+
+ inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
+
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
+
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+ gcInfo.gcMarkRegSetNpt(REG_ARG_0);
+ gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
+
+#ifdef _TARGET_ARM_
+#if NOGC_WRITE_BARRIERS
+ // Finally, we may be required to spill whatever is in the further argument registers
+ // trashed by the call. The write barrier trashes some further registers --
+ // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
+
+ regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
+#else
+ regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
+#endif
+ // Spill any other registers trashed by the write barrier call and currently in use.
+ regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
+ if (mustSpill)
+ regSet.rsSpillRegs(mustSpill);
+#endif // _TARGET_ARM_
+
+ bool free_arg0 = false;
+ if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
+ {
+ regSet.rsMaskUsed |= RBM_ARG_0;
+ free_arg0 = true;
+ }
+
+ // genEmitHelperCall might need to grab a register
+ // so don't let it spill one of the arguments
+ //
+ regMaskTP reallyUsedRegs = RBM_NONE;
+ regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
+
+ genGCWriteBarrier(tgt, wbf);
+
+ regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
+ gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
+
+ if (free_arg0)
+ {
+ regSet.rsMaskUsed &= ~RBM_ARG_0;
+ }
+ if (free_arg1)
+ {
+ regSet.rsMaskUsed &= ~RBM_ARG_1;
+ }
+
+ return resultRegMask;
+ }
+#endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+
+#else // !FEATURE_WRITE_BARRIER
+
+ NYI("FEATURE_WRITE_BARRIER unimplemented");
+ return resultRegMask;
+
+#endif // !FEATURE_WRITE_BARRIER
+}
+
+#ifdef _TARGET_X86_
+/*****************************************************************************
+ *
+ * Generate the appropriate conditional jump(s) right after the low 32 bits
+ * of two long values have been compared.
+ */
+
+void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
+{
+ if (cmp != GT_NE)
+ {
+ jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+
+ switch (cmp)
+ {
+ case GT_EQ:
+ inst_JMP(EJ_jne, jumpFalse);
+ break;
+
+ case GT_NE:
+ inst_JMP(EJ_jne, jumpTrue);
+ break;
+
+ case GT_LT:
+ case GT_LE:
+ if (isUnsigned)
+ {
+ inst_JMP(EJ_ja, jumpFalse);
+ inst_JMP(EJ_jb, jumpTrue);
+ }
+ else
+ {
+ inst_JMP(EJ_jg, jumpFalse);
+ inst_JMP(EJ_jl, jumpTrue);
+ }
+ break;
+
+ case GT_GE:
+ case GT_GT:
+ if (isUnsigned)
+ {
+ inst_JMP(EJ_jb, jumpFalse);
+ inst_JMP(EJ_ja, jumpTrue);
+ }
+ else
+ {
+ inst_JMP(EJ_jl, jumpFalse);
+ inst_JMP(EJ_jg, jumpTrue);
+ }
+ break;
+
+ default:
+ noway_assert(!"expected a comparison operator");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate the appropriate conditional jump(s) right after the high 32 bits
+ * of two long values have been compared.
+ */
+
+void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+ switch (cmp)
+ {
+ case GT_EQ:
+ inst_JMP(EJ_je, jumpTrue);
+ break;
+
+ case GT_NE:
+ inst_JMP(EJ_jne, jumpTrue);
+ break;
+
+ case GT_LT:
+ inst_JMP(EJ_jb, jumpTrue);
+ break;
+
+ case GT_LE:
+ inst_JMP(EJ_jbe, jumpTrue);
+ break;
+
+ case GT_GE:
+ inst_JMP(EJ_jae, jumpTrue);
+ break;
+
+ case GT_GT:
+ inst_JMP(EJ_ja, jumpTrue);
+ break;
+
+ default:
+ noway_assert(!"expected comparison");
+ }
+}
+#elif defined(_TARGET_ARM_)
+/*****************************************************************************
+*
+* Generate the appropriate conditional jump(s) right after the low 32 bits
+* of two long values have been compared.
+*/
+
+void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
+{
+ if (cmp != GT_NE)
+ {
+ jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+
+ switch (cmp)
+ {
+ case GT_EQ:
+ inst_JMP(EJ_ne, jumpFalse);
+ break;
+
+ case GT_NE:
+ inst_JMP(EJ_ne, jumpTrue);
+ break;
+
+ case GT_LT:
+ case GT_LE:
+ if (isUnsigned)
+ {
+ inst_JMP(EJ_hi, jumpFalse);
+ inst_JMP(EJ_lo, jumpTrue);
+ }
+ else
+ {
+ inst_JMP(EJ_gt, jumpFalse);
+ inst_JMP(EJ_lt, jumpTrue);
+ }
+ break;
+
+ case GT_GE:
+ case GT_GT:
+ if (isUnsigned)
+ {
+ inst_JMP(EJ_lo, jumpFalse);
+ inst_JMP(EJ_hi, jumpTrue);
+ }
+ else
+ {
+ inst_JMP(EJ_lt, jumpFalse);
+ inst_JMP(EJ_gt, jumpTrue);
+ }
+ break;
+
+ default:
+ noway_assert(!"expected a comparison operator");
+ }
+}
+
+/*****************************************************************************
+*
+* Generate the appropriate conditional jump(s) right after the high 32 bits
+* of two long values have been compared.
+*/
+
+void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+ switch (cmp)
+ {
+ case GT_EQ:
+ inst_JMP(EJ_eq, jumpTrue);
+ break;
+
+ case GT_NE:
+ inst_JMP(EJ_ne, jumpTrue);
+ break;
+
+ case GT_LT:
+ inst_JMP(EJ_lo, jumpTrue);
+ break;
+
+ case GT_LE:
+ inst_JMP(EJ_ls, jumpTrue);
+ break;
+
+ case GT_GE:
+ inst_JMP(EJ_hs, jumpTrue);
+ break;
+
+ case GT_GT:
+ inst_JMP(EJ_hi, jumpTrue);
+ break;
+
+ default:
+ noway_assert(!"expected comparison");
+ }
+}
+#endif
+/*****************************************************************************
+ *
+ * Called by genCondJump() for TYP_LONG.
+ */
+
+void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
+{
+ noway_assert(jumpTrue && jumpFalse);
+ noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
+ noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
+
+ GenTreePtr op1 = cond->gtOp.gtOp1;
+ GenTreePtr op2 = cond->gtOp.gtOp2;
+ genTreeOps cmp = cond->OperGet();
+
+ regMaskTP addrReg;
+
+ /* Are we comparing against a constant? */
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ __int64 lval = op2->gtLngCon.gtLconVal;
+ regNumber rTmp;
+
+ // We're "done" evaluating op2; let's strip any commas off op1 before we
+ // evaluate it.
+ op1 = genCodeForCommaTree(op1);
+
+ /* We can generate better code for some special cases */
+ instruction ins = INS_invalid;
+ bool useIncToSetFlags = false;
+ bool specialCaseCmp = false;
+
+ if (cmp == GT_EQ)
+ {
+ if (lval == 0)
+ {
+ /* op1 == 0 */
+ ins = INS_OR;
+ useIncToSetFlags = false;
+ specialCaseCmp = true;
+ }
+ else if (lval == -1)
+ {
+ /* op1 == -1 */
+ ins = INS_AND;
+ useIncToSetFlags = true;
+ specialCaseCmp = true;
+ }
+ }
+ else if (cmp == GT_NE)
+ {
+ if (lval == 0)
+ {
+ /* op1 != 0 */
+ ins = INS_OR;
+ useIncToSetFlags = false;
+ specialCaseCmp = true;
+ }
+ else if (lval == -1)
+ {
+ /* op1 != -1 */
+ ins = INS_AND;
+ useIncToSetFlags = true;
+ specialCaseCmp = true;
+ }
+ }
+
+ if (specialCaseCmp)
+ {
+ /* Make the comparand addressable */
+
+ addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
+
+ regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
+ insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ regPairNo regPair = op1->gtRegPair;
+ regNumber rLo = genRegPairLo(regPair);
+ regNumber rHi = genRegPairHi(regPair);
+ if (tmpMask & genRegMask(rLo))
+ {
+ rTmp = rLo;
+ }
+ else if (tmpMask & genRegMask(rHi))
+ {
+ rTmp = rHi;
+ rHi = rLo;
+ }
+ else
+ {
+ rTmp = regSet.rsGrabReg(tmpMask);
+ inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
+ }
+
+ /* The register is now trashed */
+ regTracker.rsTrackRegTrash(rTmp);
+
+ if (rHi != REG_STK)
+ {
+ /* Set the flags using INS_AND | INS_OR */
+ inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
+ }
+ else
+ {
+ /* Set the flags using INS_AND | INS_OR */
+ inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
+ }
+ }
+ else // op1 is not GTF_REG_VAL
+ {
+ rTmp = regSet.rsGrabReg(tmpMask);
+
+ /* Load the low 32-bits of op1 */
+ inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
+
+ /* The register is now trashed */
+ regTracker.rsTrackRegTrash(rTmp);
+
+ /* Set the flags using INS_AND | INS_OR */
+ inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
+ }
+
+ /* Free up the addrReg(s) if any */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ /* compares against -1, also requires an an inc instruction */
+ if (useIncToSetFlags)
+ {
+ /* Make sure the inc will set the flags */
+ assert(cond->gtSetFlags());
+ genIncRegBy(rTmp, 1, cond, TYP_INT);
+ }
+
+#if FEATURE_STACK_FP_X87
+ // We may need a transition block
+ if (bFPTransition)
+ {
+ jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+ }
+#endif
+ emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
+ inst_JMP(jmpKind, jumpTrue);
+ }
+ else // specialCaseCmp == false
+ {
+ /* Make the comparand addressable */
+ addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
+
+ /* Compare the high part first */
+
+ int ival = (int)(lval >> 32);
+
+ /* Comparing a register against 0 is easier */
+
+ if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
+ {
+ /* Generate 'test rTmp, rTmp' */
+ instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
+ }
+ else
+ {
+ if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
+ {
+ /* Special case: comparison of two constants */
+ // Needed as gtFoldExpr() doesn't fold longs
+
+ noway_assert(addrReg == 0);
+ int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
+
+ /* Get the constant operand into a register */
+ rTmp = genGetRegSetToIcon(op1_hiword);
+
+ /* Generate 'cmp rTmp, ival' */
+
+ inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
+ }
+ else
+ {
+ /* Generate 'cmp op1, ival' */
+
+ inst_TT_IV(INS_cmp, op1, ival, 4);
+ }
+ }
+
+#if FEATURE_STACK_FP_X87
+ // We may need a transition block
+ if (bFPTransition)
+ {
+ jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+ }
+#endif
+ /* Generate the appropriate jumps */
+
+ if (cond->gtFlags & GTF_UNSIGNED)
+ genJccLongHi(cmp, jumpTrue, jumpFalse, true);
+ else
+ genJccLongHi(cmp, jumpTrue, jumpFalse);
+
+ /* Compare the low part second */
+
+ ival = (int)lval;
+
+ /* Comparing a register against 0 is easier */
+
+ if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
+ {
+ /* Generate 'test rTmp, rTmp' */
+ instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
+ }
+ else
+ {
+ if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
+ {
+ /* Special case: comparison of two constants */
+ // Needed as gtFoldExpr() doesn't fold longs
+
+ noway_assert(addrReg == 0);
+ int op1_loword = (int)op1->gtLngCon.gtLconVal;
+
+ /* get the constant operand into a register */
+ rTmp = genGetRegSetToIcon(op1_loword);
+
+ /* Generate 'cmp rTmp, ival' */
+
+ inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
+ }
+ else
+ {
+ /* Generate 'cmp op1, ival' */
+
+ inst_TT_IV(INS_cmp, op1, ival, 0);
+ }
+ }
+
+ /* Generate the appropriate jumps */
+ genJccLongLo(cmp, jumpTrue, jumpFalse);
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+ }
+ }
+ else // (op2->gtOper != GT_CNS_LNG)
+ {
+
+ /* The operands would be reversed by physically swapping them */
+
+ noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Generate the first operand into a register pair */
+
+ genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+#if CPU_LOAD_STORE_ARCH
+ /* Generate the second operand into a register pair */
+ // Fix 388442 ARM JitStress WP7
+ genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
+#else
+ /* Make the second operand addressable */
+
+ addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
+#endif
+ /* Make sure the first operand hasn't been spilled */
+
+ genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regPairNo regPair = op1->gtRegPair;
+
+#if !CPU_LOAD_STORE_ARCH
+ /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
+
+ addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
+#endif
+
+#if FEATURE_STACK_FP_X87
+ // We may need a transition block
+ if (bFPTransition)
+ {
+ jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+ }
+#endif
+
+ /* Perform the comparison - high parts */
+
+ inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
+
+ if (cond->gtFlags & GTF_UNSIGNED)
+ genJccLongHi(cmp, jumpTrue, jumpFalse, true);
+ else
+ genJccLongHi(cmp, jumpTrue, jumpFalse);
+
+ /* Compare the low parts */
+
+ inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
+ genJccLongLo(cmp, jumpTrue, jumpFalse);
+
+ /* Free up anything that was tied up by either operand */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+
+ // Fix 388442 ARM JitStress WP7
+ regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
+ genReleaseRegPair(op2);
+#else
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+#endif
+ genReleaseRegPair(op1);
+ }
+}
+
+/*****************************************************************************
+ * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
+ * Called by genCondJumpFlt() to generate the fcomp instruction appropriate
+ * to the architecture we're running on.
+ *
+ * P5:
+ * gen_fcomp_FN: fcomp ST(0), stk
+ * gen_fcomp_FS_TT: fcomp ST(0), addr
+ * gen_fcompp_FS: fcompp
+ * These are followed by fnstsw, sahf to get the flags in EFLAGS.
+ *
+ * P6:
+ * gen_fcomp_FN: fcomip ST(0), stk
+ * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0)
+ * (and reverse the branch condition since addr comes first)
+ * gen_fcompp_FS: fcomip, fstp
+ * These instructions will correctly set the EFLAGS register.
+ *
+ * Return value: These functions return true if the instruction has
+ * already placed its result in the EFLAGS register.
+ */
+
+bool CodeGen::genUse_fcomip()
+{
+ return compiler->opts.compUseFCOMI;
+}
+
+/*****************************************************************************
+ *
+ * Sets the flag for the TYP_INT/TYP_REF comparison.
+ * We try to use the flags if they have already been set by a prior
+ * instruction.
+ * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't
+ * need to compare again with zero. Just use a "INS_js"
+ *
+ * Returns the flags the following jump/set instruction should use.
+ */
+
+emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
+{
+ noway_assert(cond->OperIsCompare());
+ noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
+
+ GenTreePtr op1 = cond->gtOp.gtOp1;
+ GenTreePtr op2 = cond->gtOp.gtOp2;
+ genTreeOps cmp = cond->OperGet();
+
+ if (cond->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Don't forget to modify the condition as well */
+
+ cond->gtOp.gtOp1 = op2;
+ cond->gtOp.gtOp2 = op1;
+ cond->SetOper(GenTree::SwapRelop(cmp));
+ cond->gtFlags &= ~GTF_REVERSE_OPS;
+
+ /* Get hold of the new values */
+
+ cmp = cond->OperGet();
+ op1 = cond->gtOp.gtOp1;
+ op2 = cond->gtOp.gtOp2;
+ }
+
+ // Note that op1's type may get bashed. So save it early
+
+ var_types op1Type = op1->TypeGet();
+ bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
+ emitAttr size = EA_UNKNOWN;
+
+ regMaskTP regNeed;
+ regMaskTP addrReg1 = RBM_NONE;
+ regMaskTP addrReg2 = RBM_NONE;
+ emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
+
+ bool byteCmp;
+ bool shortCmp;
+
+ regMaskTP newLiveMask;
+ regNumber op1Reg;
+
+ /* Are we comparing against a constant? */
+
+ if (op2->IsCnsIntOrI())
+ {
+ ssize_t ival = op2->gtIntConCommon.IconValue();
+
+ /* unsigned less than comparisons with 1 ('< 1' )
+ should be transformed into '== 0' to potentially
+ suppress a tst instruction.
+ */
+ if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
+ {
+ op2->gtIntCon.gtIconVal = ival = 0;
+ cond->gtOper = cmp = GT_EQ;
+ }
+
+ /* Comparisons against 0 can be easier */
+
+ if (ival == 0)
+ {
+ // if we can safely change the comparison to unsigned we do so
+ if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
+ {
+ unsignedCmp = true;
+ }
+
+ /* unsigned comparisons with 0 should be transformed into
+ '==0' or '!= 0' to potentially suppress a tst instruction. */
+
+ if (unsignedCmp)
+ {
+ if (cmp == GT_GT)
+ cond->gtOper = cmp = GT_NE;
+ else if (cmp == GT_LE)
+ cond->gtOper = cmp = GT_EQ;
+ }
+
+ /* Is this a simple zero/non-zero test? */
+
+ if (cmp == GT_EQ || cmp == GT_NE)
+ {
+ /* Is the operand an "AND" operation? */
+
+ if (op1->gtOper == GT_AND)
+ {
+ GenTreePtr an1 = op1->gtOp.gtOp1;
+ GenTreePtr an2 = op1->gtOp.gtOp2;
+
+ /* Check for the case "expr & icon" */
+
+ if (an2->IsIntCnsFitsInI32())
+ {
+ int iVal = (int)an2->gtIntCon.gtIconVal;
+
+ /* make sure that constant is not out of an1's range */
+
+ switch (an1->gtType)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ if (iVal & 0xffffff00)
+ goto NO_TEST_FOR_AND;
+ break;
+ case TYP_CHAR:
+ case TYP_SHORT:
+ if (iVal & 0xffff0000)
+ goto NO_TEST_FOR_AND;
+ break;
+ default:
+ break;
+ }
+
+ if (an1->IsCnsIntOrI())
+ {
+ // Special case - Both operands of AND are consts
+ genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ addrReg1 = genRegMask(an1->gtRegNum);
+ }
+ else
+ {
+ addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
+ }
+#if CPU_LOAD_STORE_ARCH
+ if ((an1->gtFlags & GTF_REG_VAL) == 0)
+ {
+ genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
+ if (arm_Valid_Imm_For_Alu(iVal))
+ {
+ inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
+ }
+ else
+ {
+ regNumber regTmp = regSet.rsPickFreeReg();
+ instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
+ inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
+ }
+ genReleaseReg(an1);
+ addrReg1 = RBM_NONE;
+ }
+ else
+#endif
+ {
+#ifdef _TARGET_XARCH_
+ // Check to see if we can use a smaller immediate.
+ if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal))
+ {
+ var_types testType =
+ (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
+#if CPU_HAS_BYTE_REGS
+ // if we don't have byte-able register, switch to the 2-byte form
+ if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
+ {
+ testType = TYP_USHORT;
+ }
+#endif // CPU_HAS_BYTE_REGS
+
+ inst_TT_IV(INS_TEST, an1, iVal, testType);
+ }
+ else
+#endif // _TARGET_XARCH_
+ {
+ inst_TT_IV(INS_TEST, an1, iVal);
+ }
+ }
+
+ goto DONE;
+
+ NO_TEST_FOR_AND:;
+ }
+
+ // TODO: Check for other cases that can generate 'test',
+ // TODO: also check for a 64-bit integer zero test which
+ // TODO: could generate 'or lo, hi' followed by jz/jnz.
+ }
+ }
+
+ // See what Jcc instruction we would use if we can take advantage of
+ // the knowledge of EFLAGs.
+
+ if (unsignedCmp)
+ {
+ /*
+ Unsigned comparison to 0. Using this table:
+
+ ----------------------------------------------------
+ | Comparison | Flags Checked | Instruction Used |
+ ----------------------------------------------------
+ | == 0 | ZF = 1 | je |
+ ----------------------------------------------------
+ | != 0 | ZF = 0 | jne |
+ ----------------------------------------------------
+ | < 0 | always FALSE | N/A |
+ ----------------------------------------------------
+ | <= 0 | ZF = 1 | je |
+ ----------------------------------------------------
+ | >= 0 | always TRUE | N/A |
+ ----------------------------------------------------
+ | > 0 | ZF = 0 | jne |
+ ----------------------------------------------------
+ */
+ switch (cmp)
+ {
+#ifdef _TARGET_ARM_
+ case GT_EQ:
+ jumpKind = EJ_eq;
+ break;
+ case GT_NE:
+ jumpKind = EJ_ne;
+ break;
+ case GT_LT:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_LE:
+ jumpKind = EJ_eq;
+ break;
+ case GT_GE:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_GT:
+ jumpKind = EJ_ne;
+ break;
+#elif defined(_TARGET_X86_)
+ case GT_EQ:
+ jumpKind = EJ_je;
+ break;
+ case GT_NE:
+ jumpKind = EJ_jne;
+ break;
+ case GT_LT:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_LE:
+ jumpKind = EJ_je;
+ break;
+ case GT_GE:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_GT:
+ jumpKind = EJ_jne;
+ break;
+#endif // TARGET
+ default:
+ noway_assert(!"Unexpected comparison OpCode");
+ break;
+ }
+ }
+ else
+ {
+ /*
+ Signed comparison to 0. Using this table:
+
+ -----------------------------------------------------
+ | Comparison | Flags Checked | Instruction Used |
+ -----------------------------------------------------
+ | == 0 | ZF = 1 | je |
+ -----------------------------------------------------
+ | != 0 | ZF = 0 | jne |
+ -----------------------------------------------------
+ | < 0 | SF = 1 | js |
+ -----------------------------------------------------
+ | <= 0 | N/A | N/A |
+ -----------------------------------------------------
+ | >= 0 | SF = 0 | jns |
+ -----------------------------------------------------
+ | > 0 | N/A | N/A |
+ -----------------------------------------------------
+ */
+
+ switch (cmp)
+ {
+#ifdef _TARGET_ARM_
+ case GT_EQ:
+ jumpKind = EJ_eq;
+ break;
+ case GT_NE:
+ jumpKind = EJ_ne;
+ break;
+ case GT_LT:
+ jumpKind = EJ_mi;
+ break;
+ case GT_LE:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_GE:
+ jumpKind = EJ_pl;
+ break;
+ case GT_GT:
+ jumpKind = EJ_NONE;
+ break;
+#elif defined(_TARGET_X86_)
+ case GT_EQ:
+ jumpKind = EJ_je;
+ break;
+ case GT_NE:
+ jumpKind = EJ_jne;
+ break;
+ case GT_LT:
+ jumpKind = EJ_js;
+ break;
+ case GT_LE:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_GE:
+ jumpKind = EJ_jns;
+ break;
+ case GT_GT:
+ jumpKind = EJ_NONE;
+ break;
+#endif // TARGET
+ default:
+ noway_assert(!"Unexpected comparison OpCode");
+ break;
+ }
+ assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
+ }
+ assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
+
+ /* Is the value a simple local variable? */
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ /* Is the flags register set to the value? */
+
+ if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
+ {
+ if (jumpKind != EJ_NONE)
+ {
+ addrReg1 = RBM_NONE;
+ genUpdateLife(op1);
+ goto DONE_FLAGS;
+ }
+ }
+ }
+
+ /* Make the comparand addressable */
+ addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
+
+ /* Are the condition flags set based on the value? */
+
+ unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ if (genFlagsAreReg(op1->gtRegNum))
+ {
+ flags |= GTF_ZSF_SET;
+ }
+ }
+
+ if (flags)
+ {
+ if (jumpKind != EJ_NONE)
+ {
+ goto DONE_FLAGS;
+ }
+ }
+
+ /* Is the value in a register? */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ regNumber reg = op1->gtRegNum;
+
+ /* With a 'test' we can do any signed test or any test for equality */
+
+ if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
+ {
+ emitAttr compareSize = emitTypeSize(op1->TypeGet());
+
+ // If we have an GT_REG_VAR then the register will be properly sign/zero extended
+ // But only up to 4 bytes
+ if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
+ {
+ compareSize = EA_4BYTE;
+ }
+
+#if CPU_HAS_BYTE_REGS
+ // Make sure if we require a byte compare that we have a byte-able register
+ if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
+#endif // CPU_HAS_BYTE_REGS
+ {
+ /* Generate 'test reg, reg' */
+ instGen_Compare_Reg_To_Zero(compareSize, reg);
+ goto DONE;
+ }
+ }
+ }
+ }
+
+ else // if (ival != 0)
+ {
+ bool smallOk = true;
+
+ /* make sure that constant is not out of op1's range
+ if it is, we need to perform an int with int comparison
+ and therefore, we set smallOk to false, so op1 gets loaded
+ into a register
+ */
+
+ /* If op1 is TYP_SHORT, and is followed by an unsigned
+ * comparison, we can use smallOk. But we don't know which
+ * flags will be needed. This probably doesn't happen often.
+ */
+ var_types gtType = op1->TypeGet();
+
+ switch (gtType)
+ {
+ case TYP_BYTE:
+ if (ival != (signed char)ival)
+ smallOk = false;
+ break;
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ if (ival != (unsigned char)ival)
+ smallOk = false;
+ break;
+
+ case TYP_SHORT:
+ if (ival != (signed short)ival)
+ smallOk = false;
+ break;
+ case TYP_CHAR:
+ if (ival != (unsigned short)ival)
+ smallOk = false;
+ break;
+
+#ifdef _TARGET_64BIT_
+ case TYP_INT:
+ if (!FitsIn<INT32>(ival))
+ smallOk = false;
+ break;
+ case TYP_UINT:
+ if (!FitsIn<UINT32>(ival))
+ smallOk = false;
+ break;
+#endif // _TARGET_64BIT_
+
+ default:
+ break;
+ }
+
+ if (smallOk && // constant is in op1's range
+ !unsignedCmp && // signed comparison
+ varTypeIsSmall(gtType) && // smalltype var
+ varTypeIsUnsigned(gtType)) // unsigned type
+ {
+ unsignedCmp = true;
+ }
+
+ /* Make the comparand addressable */
+ addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
+ }
+
+ // #if defined(DEBUGGING_SUPPORT)
+
+ /* Special case: comparison of two constants */
+
+ // Needed if Importer doesn't call gtFoldExpr()
+
+ if (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI()))
+ {
+ // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+
+ /* Workaround: get the constant operand into a register */
+ genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(addrReg1 == RBM_NONE);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ addrReg1 = genRegMask(op1->gtRegNum);
+ }
+
+ // #endif
+
+ /* Compare the operand against the constant */
+
+ if (op2->IsIconHandle())
+ {
+ inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, ival);
+ }
+ goto DONE;
+ }
+
+ //---------------------------------------------------------------------
+ //
+ // We reach here if op2 was not a GT_CNS_INT
+ //
+
+ byteCmp = false;
+ shortCmp = false;
+
+ if (op1Type == op2->gtType)
+ {
+ shortCmp = varTypeIsShort(op1Type);
+ byteCmp = varTypeIsByte(op1Type);
+ }
+
+ noway_assert(op1->gtOper != GT_CNS_INT);
+
+ if (op2->gtOper == GT_LCL_VAR)
+ genMarkLclVar(op2);
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+ /* Are we comparing against a register? */
+
+ if (op2->gtFlags & GTF_REG_VAL)
+ {
+ /* Make the comparands addressable and mark as used */
+
+ assert(addrReg1 == RBM_NONE);
+ addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
+
+ /* Is the size of the comparison byte/char/short ? */
+
+ if (varTypeIsSmall(op1->TypeGet()))
+ {
+ /* Is op2 sitting in an appropriate register? */
+
+ if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
+ goto NO_SMALL_CMP;
+
+ /* Is op2 of the right type for a small comparison */
+
+ if (op2->gtOper == GT_REG_VAR)
+ {
+ if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
+ goto NO_SMALL_CMP;
+ }
+ else
+ {
+ if (op1->gtType != op2->gtType)
+ goto NO_SMALL_CMP;
+ }
+
+ if (varTypeIsUnsigned(op1->TypeGet()))
+ unsignedCmp = true;
+ }
+
+ assert(addrReg2 == RBM_NONE);
+
+ genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+ addrReg2 = genRegMask(op2->gtRegNum);
+ addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+ /* Compare against the register */
+
+ inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
+
+ goto DONE;
+
+ NO_SMALL_CMP:
+
+ // op1 has been made addressable and is marked as in use
+ // op2 is un-generated
+ assert(addrReg2 == 0);
+
+ if ((op1->gtFlags & GTF_REG_VAL) == 0)
+ {
+ regNumber reg1 = regSet.rsPickReg();
+
+ noway_assert(varTypeIsSmall(op1->TypeGet()));
+ instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL) != 0);
+
+ // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
+ // so we need to make sure it is still valid. Note that at this point, reg1 is
+ // *not* marked as in use, and it is possible for it to be used in the address
+ // mode expression, but that is OK, because we are done with expression after
+ // this. We only need reg1.
+ addrReg1 = genKeepAddressable(op1, addrReg1);
+ inst_RV_TT(ins, reg1, op1);
+ regTracker.rsTrackRegTrash(reg1);
+
+ genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
+ addrReg1 = 0;
+
+ genMarkTreeInReg(op1, reg1);
+
+ regSet.rsMarkRegUsed(op1);
+ addrReg1 = genRegMask(op1->gtRegNum);
+ }
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+ goto DONE_OP1;
+ }
+
+ // We come here if op2 is not enregistered or not in a "good" register.
+
+ assert(addrReg1 == 0);
+
+ // Determine what registers go live between op1 and op2
+ newLiveMask = genNewLiveRegMask(op1, op2);
+
+ // Setup regNeed with the set of register that we suggest for op1 to be in
+ //
+ regNeed = RBM_ALLINT;
+
+ // avoid selecting registers that get newly born in op2
+ regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
+
+ // avoid selecting op2 reserved regs
+ regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
+
+#if CPU_HAS_BYTE_REGS
+ // if necessary setup regNeed to select just the byte-able registers
+ if (byteCmp)
+ regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
+#endif // CPU_HAS_BYTE_REGS
+
+ // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
+ //
+ genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ op1Reg = op1->gtRegNum;
+
+ // Setup regNeed with the set of register that we require for op1 to be in
+ //
+ regNeed = RBM_ALLINT;
+
+#if CPU_HAS_BYTE_REGS
+ // if necessary setup regNeed to select just the byte-able registers
+ if (byteCmp)
+ regNeed &= RBM_BYTE_REGS;
+#endif // CPU_HAS_BYTE_REGS
+
+ // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
+ regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
+
+ // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
+ regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
+
+ // Did we end up in an acceptable register?
+ // and do we have an acceptable free register available to grab?
+ //
+ if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
+ {
+ // Grab an acceptable register
+ regNumber newReg = regSet.rsGrabReg(regNeed);
+
+ noway_assert(op1Reg != newReg);
+
+ /* Update the value in the target register */
+
+ regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+ inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+ /* The value has been transferred to 'reg' */
+
+ if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+ gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+ /* The value is now in an appropriate register */
+
+ op1->gtRegNum = newReg;
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ op1Reg = op1->gtRegNum;
+
+ genUpdateLife(op1);
+
+ /* Mark the register as 'used' */
+ regSet.rsMarkRegUsed(op1);
+
+ addrReg1 = genRegMask(op1Reg);
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+DONE_OP1:
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
+ // when byteCmp is true we will perform a byte sized cmp instruction
+ // and that instruction requires that any registers used are byte-able ones.
+ //
+ regNeed = RBM_ALLINT;
+
+#if CPU_HAS_BYTE_REGS
+ // if necessary setup regNeed to select just the byte-able registers
+ if (byteCmp)
+ regNeed &= RBM_BYTE_REGS;
+#endif // CPU_HAS_BYTE_REGS
+
+ /* Make the comparand addressable */
+ assert(addrReg2 == 0);
+ addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
+
+ /* Make sure the first operand is still in a register; if
+ it's been spilled, we have to make sure it's reloaded
+ into a byte-addressable register if needed.
+ Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
+ */
+
+ assert(addrReg1 != 0);
+ genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
+
+ addrReg1 = genRegMask(op1->gtRegNum);
+ regSet.rsLockUsedReg(addrReg1);
+
+ /* Make sure that op2 is addressable. If we are going to do a
+ byte-comparison, we need it to be in a byte register. */
+
+ if (byteCmp && (op2->gtFlags & GTF_REG_VAL))
+ {
+ genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
+ addrReg2 = genRegMask(op2->gtRegNum);
+ }
+ else
+ {
+ addrReg2 = genKeepAddressable(op2, addrReg2);
+ }
+
+ regSet.rsUnlockUsedReg(addrReg1);
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+ if (byteCmp || shortCmp)
+ {
+ size = emitTypeSize(op2->TypeGet());
+ if (varTypeIsUnsigned(op1Type))
+ unsignedCmp = true;
+ }
+ else
+ {
+ size = emitActualTypeSize(op2->TypeGet());
+ }
+
+ /* Perform the comparison */
+ inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
+
+DONE:
+
+ jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
+
+DONE_FLAGS: // We have determined what jumpKind to use
+
+ genUpdateLife(cond);
+
+ /* The condition value is dead at the jump that follows */
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+ genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
+ genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
+
+ noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
+
+ return jumpKind;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Generate code to jump to the jump target of the current basic block if
+ * the given relational operator yields 'true'.
+ */
+
+void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
+{
+ BasicBlock* jumpTrue;
+ BasicBlock* jumpFalse;
+
+ GenTreePtr op1 = cond->gtOp.gtOp1;
+ GenTreePtr op2 = cond->gtOp.gtOp2;
+ genTreeOps cmp = cond->OperGet();
+
+ if (destTrue)
+ {
+ jumpTrue = destTrue;
+ jumpFalse = destFalse;
+ }
+ else
+ {
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ jumpTrue = compiler->compCurBB->bbJumpDest;
+ jumpFalse = compiler->compCurBB->bbNext;
+ }
+
+ noway_assert(cond->OperIsCompare());
+
+ /* Make sure the more expensive operand is 'op1' */
+ noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
+ {
+ /* Don't forget to modify the condition as well */
+
+ cond->gtOp.gtOp1 = op2;
+ cond->gtOp.gtOp2 = op1;
+ cond->SetOper(GenTree::SwapRelop(cmp));
+ cond->gtFlags &= ~GTF_REVERSE_OPS;
+
+ /* Get hold of the new values */
+
+ cmp = cond->OperGet();
+ op1 = cond->gtOp.gtOp1;
+ op2 = cond->gtOp.gtOp2;
+ }
+
+ /* What is the type of the operand? */
+
+ switch (genActualType(op1->gtType))
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+ emitJumpKind jumpKind;
+
+ // Check if we can use the currently set flags. Else set them
+
+ jumpKind = genCondSetFlags(cond);
+
+#if FEATURE_STACK_FP_X87
+ if (bStackFPFixup)
+ {
+ genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
+ }
+ else
+#endif
+ {
+ /* Generate the conditional jump */
+ inst_JMP(jumpKind, jumpTrue);
+ }
+
+ return;
+
+ case TYP_LONG:
+#if FEATURE_STACK_FP_X87
+ if (bStackFPFixup)
+ {
+ genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
+ }
+ else
+#endif
+ {
+ genCondJumpLng(cond, jumpTrue, jumpFalse);
+ }
+ return;
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+#if FEATURE_STACK_FP_X87
+ genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
+#else
+ genCondJumpFloat(cond, jumpTrue, jumpFalse);
+#endif
+ return;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(cond);
+#endif
+ unreached(); // unexpected/unsupported 'jtrue' operands type
+ }
+}
+
+/*****************************************************************************
+ * Spill registers to check callers can handle it.
+ */
+
+#ifdef DEBUG
+
+void CodeGen::genStressRegs(GenTreePtr tree)
+{
+ if (regSet.rsStressRegs() < 2)
+ return;
+
+ /* Spill as many registers as possible. Callers should be prepared
+ to handle this case.
+ But don't spill trees with no size (TYP_STRUCT comes to mind) */
+
+ {
+ regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
+ regNumber regNum;
+ regMaskTP regBit;
+
+ for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
+ (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
+ {
+ regSet.rsSpillReg(regNum);
+
+ spillRegs &= regSet.rsMaskUsed;
+
+ if (!spillRegs)
+ break;
+ }
+ }
+ }
+
+ regMaskTP trashRegs = regSet.rsRegMaskFree();
+
+ if (trashRegs == RBM_NONE)
+ return;
+
+ /* It is sometimes reasonable to expect that calling genCodeForTree()
+ on certain trees won't spill anything */
+
+ if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
+ handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
+ {
+ trashRegs &= ~(RBM_EXCEPTION_OBJECT);
+ }
+
+ // If genCodeForTree() effectively gets called a second time on the same tree
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
+ trashRegs &= ~genRegMask(tree->gtRegNum);
+ }
+
+ if (tree->gtType == TYP_INT && tree->OperIsSimple())
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ if (op1 && (op1->gtFlags & GTF_REG_VAL))
+ trashRegs &= ~genRegMask(op1->gtRegNum);
+ if (op2 && (op2->gtFlags & GTF_REG_VAL))
+ trashRegs &= ~genRegMask(op2->gtRegNum);
+ }
+
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ if (compiler->info.compCallUnmanaged)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+ if (varDsc->lvRegister)
+ trashRegs &= ~genRegMask(varDsc->lvRegNum);
+ }
+ }
+
+ /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
+ to save/restore the register. We try to be as unintrusive
+ as possible */
+
+ noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
+ // This is obviously false for ARM, but this function is never called.
+ for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
+ {
+ regMaskTP regMask = genRegMask(reg);
+
+ if (regSet.rsRegsModified(regMask & trashRegs))
+ genSetRegToIcon(reg, 0);
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Generate code for a GTK_CONST tree
+ */
+
+void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ noway_assert(tree->IsCnsIntOrI());
+
+ ssize_t ival = tree->gtIntConCommon.IconValue();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+ bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
+
+#if REDUNDANT_LOAD
+
+ /* If we are targeting destReg and ival is zero */
+ /* we would rather xor needReg than copy another register */
+
+ if (!needReloc)
+ {
+ bool reuseConstantInReg = false;
+
+ if (destReg == RBM_NONE)
+ reuseConstantInReg = true;
+
+#ifdef _TARGET_ARM_
+ // If we can set a register to a constant with a small encoding, then do that.
+ // Assume we'll get a low register if needReg has low registers as options.
+ if (!reuseConstantInReg &&
+ !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
+ {
+ reuseConstantInReg = true;
+ }
+#else
+ if (!reuseConstantInReg && ival != 0)
+ reuseConstantInReg = true;
+#endif
+
+ if (reuseConstantInReg)
+ {
+ /* Is the constant already in register? If so, use this register */
+
+ reg = regTracker.rsIconIsInReg(ival);
+ if (reg != REG_NA)
+ goto REG_LOADED;
+ }
+ }
+
+#endif // REDUNDANT_LOAD
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ /* If the constant is a handle, we need a reloc to be applied to it */
+
+ if (needReloc)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ else
+ {
+ genSetRegToIcon(reg, ival, tree->TypeGet());
+ }
+
+REG_LOADED:
+
+#ifdef DEBUG
+ /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
+
+ if (!genTempLiveChg)
+ {
+ VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
+ genTempLiveChg = true;
+ }
+#endif
+
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a GTK_LEAF tree
+ */
+
+void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ genTreeOps oper = tree->OperGet();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP regs = regSet.rsMaskUsed;
+ regMaskTP needReg = destReg;
+ size_t size;
+
+ noway_assert(tree->OperKind() & GTK_LEAF);
+
+ switch (oper)
+ {
+ case GT_REG_VAR:
+ NO_WAY("GT_REG_VAR should have been caught above");
+ break;
+
+ case GT_LCL_VAR:
+
+ /* Does the variable live in a register? */
+
+ if (genMarkLclVar(tree))
+ {
+ genCodeForTree_REG_VAR1(tree);
+ return;
+ }
+
+#if REDUNDANT_LOAD
+
+ /* Is the local variable already in register? */
+
+ reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
+
+ if (reg != REG_NA)
+ {
+ /* Use the register the variable happens to be in */
+ regMaskTP regMask = genRegMask(reg);
+
+ // If the register that it was in isn't one of the needRegs
+ // then try to move it into a needReg register
+
+ if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
+ {
+ regNumber rg2 = reg;
+ reg = regSet.rsPickReg(needReg, bestReg);
+ if (reg != rg2)
+ {
+ regMask = genRegMask(reg);
+ inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
+ }
+ }
+
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+ regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
+ break;
+ }
+
+#endif
+ goto MEM_LEAF;
+
+ case GT_LCL_FLD:
+
+ // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+ goto MEM_LEAF;
+
+ case GT_CLS_VAR:
+
+ MEM_LEAF:
+
+ /* Pick a register for the value */
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ /* Load the variable into the register */
+
+ size = genTypeSize(tree->gtType);
+
+ if (size < EA_4BYTE)
+ {
+ instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+ inst_RV_TT(ins, reg, tree, 0);
+
+ /* We've now "promoted" the tree-node to TYP_INT */
+
+ tree->gtType = TYP_INT;
+ }
+ else
+ {
+ inst_RV_TT(INS_mov, reg, tree, 0);
+ }
+
+ regTracker.rsTrackRegTrash(reg);
+
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+
+ switch (oper)
+ {
+ case GT_CLS_VAR:
+ regTracker.rsTrackRegClsVar(reg, tree);
+ break;
+ case GT_LCL_VAR:
+ regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
+ break;
+ case GT_LCL_FLD:
+ break;
+ default:
+ noway_assert(!"Unexpected oper");
+ }
+
+#ifdef _TARGET_ARM_
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ break;
+
+ case GT_NO_OP:
+ // The VM does certain things with actual NOP instructions
+ // so generate something small that has no effect, but isn't
+ // a typical NOP
+ if (tree->gtFlags & GTF_NO_OP_NO)
+ {
+#ifdef _TARGET_XARCH_
+ // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
+ instGen(INS_nop);
+ instGen(INS_nop);
+#elif defined(_TARGET_ARM_)
+ // The VM isn't checking yet, when it does, hopefully it will
+ // get fooled by the wider variant.
+ instGen(INS_nopw);
+#else
+ NYI("Non-nop NO_OP");
+#endif
+ }
+ else
+ {
+ instGen(INS_nop);
+ }
+ reg = REG_STK;
+ break;
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+
+ /* Have to clear the shadowSP of the nesting level which
+ encloses the finally */
+
+ unsigned finallyNesting;
+ finallyNesting = (unsigned)tree->gtVal.gtVal1;
+ noway_assert(tree->gtVal.gtVal1 <
+ compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
+ noway_assert(finallyNesting < compiler->compHndBBtabCount);
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
+ sizeof(void*)); // below doesn't underflow.
+ filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
+
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
+ reg = REG_STK;
+ break;
+#endif // !FEATURE_EH_FUNCLETS
+
+ case GT_CATCH_ARG:
+
+ noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+ /* Catch arguments get passed in a register. genCodeForBBlist()
+ would have marked it as holding a GC object, but not used. */
+
+ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+ reg = REG_EXCEPTION_OBJECT;
+ break;
+
+ case GT_JMP:
+ genCodeForTreeLeaf_GT_JMP(tree);
+ return;
+
+ case GT_MEMORYBARRIER:
+ // Emit the memory barrier instruction
+ instGen_MemoryBarrier();
+ reg = REG_STK;
+ break;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected leaf");
+ }
+
+ noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
+ genCodeForTree_DONE(tree, reg);
+}
+
+GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree)
+{
+ while (tree->OperGet() == GT_COMMA)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ genCodeForTree(op1, RBM_NONE);
+ gcInfo.gcMarkRegPtrVal(op1);
+
+ tree = tree->gtOp.gtOp2;
+ }
+ return tree;
+}
+
+/*****************************************************************************
+ *
+ * Generate code for the a leaf node of type GT_JMP
+ */
+
+void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
+{
+ noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
+
+#ifdef PROFILING_SUPPORTED
+ if (compiler->compIsProfilerHookNeeded())
+ {
+ /* fire the event at the call site */
+ unsigned saveStackLvl2 = genStackLevel;
+
+ compiler->info.compProfilerCallback = true;
+
+#ifdef _TARGET_X86_
+ //
+ // Push the profilerHandle
+ //
+ regMaskTP byrefPushedRegs;
+ regMaskTP norefPushedRegs;
+ regMaskTP pushedArgRegs =
+ genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
+ &norefPushedRegs);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
+ (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+ genSinglePush();
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+ sizeof(int) * 1, // argSize
+ EA_UNKNOWN); // retSize
+
+ //
+ // Adjust the number of stack slots used by this managed method if necessary.
+ //
+ if (compiler->fgPtrArgCntMax < 1)
+ {
+ compiler->fgPtrArgCntMax = 1;
+ }
+
+ genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
+#elif _TARGET_ARM_
+ // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
+ // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
+ regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
+ noway_assert(argReg == REG_PROFILER_JMP_ARG);
+ regSet.rsLockReg(RBM_PROFILER_JMP_USED);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ regTracker.rsTrackRegTrash(argReg);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ }
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+ 0, // argSize
+ EA_UNKNOWN); // retSize
+
+ regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
+#else
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
+#endif //_TARGET_X86_
+
+ /* Restore the stack level */
+ genStackLevel = saveStackLvl2;
+ }
+#endif // PROFILING_SUPPORTED
+
+ /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to
+ * call genPInvokeMethodEpilog anywhere that we have a method return. We should really
+ * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
+ */
+
+ if (compiler->info.compCallUnmanaged)
+ {
+ genPInvokeMethodEpilog();
+ }
+
+ // Make sure register arguments are in their initial registers
+ // and stack arguments are put back as well.
+ //
+ // This does not deal with circular dependencies of register
+ // arguments, which is safe because RegAlloc prevents that by
+ // not enregistering any RegArgs when a JMP opcode is used.
+
+ if (compiler->info.compArgsCount == 0)
+ {
+ return;
+ }
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // First move any enregistered stack arguments back to the stack
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+ if (varDsc->lvIsRegArg || !varDsc->lvRegister)
+ continue;
+
+ /* Argument was passed on the stack, but ended up in a register
+ * Store it back to the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ if (varDsc->TypeGet() == TYP_LONG)
+ {
+ /* long - at least the low half must be enregistered */
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
+
+ /* Is the upper half also enregistered? */
+
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
+ }
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
+ varNum, 0);
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ regMaskTP fixedArgsMask = RBM_NONE;
+#endif
+
+ // Next move any un-enregistered register arguments back to their register
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ /* Is this variable a register arg? */
+
+ if (!varDsc->lvIsRegArg)
+ continue;
+
+ /* Register argument */
+
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+ noway_assert(!varDsc->lvRegister);
+
+ /* Reload it from the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ if (varDsc->TypeGet() == TYP_LONG)
+ {
+ /* long - at least the low half must be enregistered */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
+ regTracker.rsTrackRegTrash(varDsc->lvArgReg);
+
+ /* Also assume the upper half also enregistered */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
+ sizeof(int));
+ regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
+
+#ifdef _TARGET_ARM_
+ fixedArgsMask |= genRegMask(varDsc->lvArgReg);
+ fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
+#endif
+ }
+ else
+#endif // _TARGET_64BIT_
+#ifdef _TARGET_ARM_
+ if (varDsc->lvIsHfaRegArg())
+ {
+ const var_types elemType = varDsc->GetHfaType();
+ const instruction loadOp = ins_Load(elemType);
+ const emitAttr size = emitTypeSize(elemType);
+ regNumber argReg = varDsc->lvArgReg;
+ const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
+
+ for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
+ {
+ getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
+ assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
+ argReg = regNextOfType(argReg, elemType);
+ }
+ }
+ else if (varDsc->TypeGet() == TYP_STRUCT)
+ {
+ const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes
+ const instruction loadOp = ins_Load(elemType);
+ const emitAttr size = emitTypeSize(elemType);
+ regNumber argReg = varDsc->lvArgReg;
+ const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
+
+ for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
+ {
+ getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
+ regTracker.rsTrackRegTrash(argReg);
+
+ fixedArgsMask |= genRegMask(argReg);
+
+ argReg = genRegArgNext(argReg);
+ }
+ }
+ else
+#endif //_TARGET_ARM_
+ {
+ var_types loadType = varDsc->TypeGet();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+ bool twoParts = false;
+
+ if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
+ {
+#ifndef _TARGET_64BIT_
+ if (loadType == TYP_DOUBLE)
+ twoParts = true;
+#endif
+ loadType = TYP_I_IMPL;
+ assert(isValidIntArgReg(argReg));
+ }
+
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+ regTracker.rsTrackRegTrash(argReg);
+
+#ifdef _TARGET_ARM_
+ fixedArgsMask |= genRegMask(argReg);
+#endif
+ if (twoParts)
+ {
+ argReg = genRegArgNext(argReg);
+ assert(isValidIntArgReg(argReg));
+
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
+ regTracker.rsTrackRegTrash(argReg);
+
+#ifdef _TARGET_ARM_
+ fixedArgsMask |= genRegMask(argReg);
+#endif
+ }
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ // Check if we have any non-fixed args possibly in the arg registers.
+ if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
+ {
+ noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
+
+ regNumber regDeclArgs = REG_ARG_FIRST;
+
+ // Skip the 'this' pointer.
+ if (!compiler->info.compIsStatic)
+ {
+ regDeclArgs = REG_NEXT(regDeclArgs);
+ }
+
+ // Skip the 'generic context.'
+ if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ regDeclArgs = REG_NEXT(regDeclArgs);
+ }
+
+ // Skip any 'return buffer arg.'
+ if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ regDeclArgs = REG_NEXT(regDeclArgs);
+ }
+
+ // Skip the 'vararg cookie.'
+ regDeclArgs = REG_NEXT(regDeclArgs);
+
+ // Also add offset for the vararg cookie.
+ int offset = REGSIZE_BYTES;
+
+ // Load all the variable arguments in registers back to their registers.
+ for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
+ {
+ if (!(fixedArgsMask & genRegMask(reg)))
+ {
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ offset += REGSIZE_BYTES;
+ }
+ }
+#endif // _TARGET_ARM_
+}
+
+/*****************************************************************************
+ *
+ * Check if a variable is assigned to in a tree. The variable number is
+ * passed in pCallBackData. If the variable is assigned to, return
+ * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE.
+ */
+Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
+ (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
+ {
+ return Compiler::WALK_ABORT;
+ }
+
+ return Compiler::WALK_CONTINUE;
+}
+
+regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ /* Does the variable live in a register? */
+
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+ if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
+ {
+ return varDsc->lvRegNum;
+ }
+ }
+
+ return REG_NA;
+}
+
+// inline
+void CodeGen::unspillLiveness(genLivenessSet* ls)
+{
+ // Only try to unspill the registers that are missing from the currentLiveRegs
+ //
+ regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
+ regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+ cannotSpillMask &= ~currentLiveRegs;
+
+ // Typically this will always be true and we will return
+ //
+ if (cannotSpillMask == 0)
+ return;
+
+ for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
+ {
+ // Is this a register that we cannot leave in the spilled state?
+ //
+ if ((cannotSpillMask & genRegMask(reg)) == 0)
+ continue;
+
+ RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
+
+ // Was it spilled, if not then skip it.
+ //
+ if (!spill)
+ continue;
+
+ noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
+
+ regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a qmark colon
+ */
+
+void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ regNumber reg;
+ regMaskTP regs = regSet.rsMaskUsed;
+ regMaskTP needReg = destReg;
+
+ noway_assert(compiler->compQmarkUsed);
+ noway_assert(tree->gtOper == GT_QMARK);
+ noway_assert(op1->OperIsCompare());
+ noway_assert(op2->gtOper == GT_COLON);
+
+ GenTreePtr thenNode = op2->AsColon()->ThenNode();
+ GenTreePtr elseNode = op2->AsColon()->ElseNode();
+
+ /* If elseNode is a Nop node you must reverse the
+ thenNode and elseNode prior to reaching here!
+ (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
+
+ noway_assert(!elseNode->IsNothingNode());
+
+ /* Try to implement the qmark colon using a CMOV. If we can't for
+ whatever reason, this will return false and we will implement
+ it using regular branching constructs. */
+
+ if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
+ return;
+
+ /*
+ This is a ?: operator; generate code like this:
+
+ condition_compare
+ jmp_if_true lab_true
+
+ lab_false:
+ op1 (false = 'else' part)
+ jmp lab_done
+
+ lab_true:
+ op2 (true = 'then' part)
+
+ lab_done:
+
+
+ NOTE: If no 'then' part we do not generate the 'jmp lab_done'
+ or the 'lab_done' label
+ */
+
+ BasicBlock* lab_true;
+ BasicBlock* lab_false;
+ BasicBlock* lab_done;
+
+ genLivenessSet entryLiveness;
+ genLivenessSet exitLiveness;
+
+ lab_true = genCreateTempLabel();
+ lab_false = genCreateTempLabel();
+
+#if FEATURE_STACK_FP_X87
+ /* Spill any register that hold partial values so that the exit liveness
+ from sides is the same */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
+
+ // spillMask should be the whole FP stack
+ noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
+#endif
+
+ SpillTempsStackFP(regSet.rsMaskUsedFloat);
+ noway_assert(regSet.rsMaskUsedFloat == 0);
+#endif
+
+ /* Before we generate code for qmark, we spill all the currently used registers
+ that conflict with the registers used in the qmark tree. This is to avoid
+ introducing spills that only occur on either the 'then' or 'else' side of
+ the tree, but not both identically. We need to be careful with enregistered
+ variables that are used; see below.
+ */
+
+ if (regSet.rsMaskUsed)
+ {
+ /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
+ variable), then it may not get spilled. However, the variable may
+ then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
+ may get spilled from one side and not the other. So unmark regSet.rsMaskVars
+ before spilling regSet.rsMaskUsed */
+
+ regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
+ regMaskTP rsAdditional = RBM_NONE;
+
+ // For each multi-use of an enregistered variable, we need to determine if
+ // it can get spilled inside the qmark colon. This can only happen if
+ // its life ends somewhere in the qmark colon. We have the following
+ // cases:
+ // 1) Variable is dead at the end of the colon -- needs to be spilled
+ // 2) Variable is alive at the end of the colon -- needs to be spilled
+ // iff it is assigned to in the colon. In order to determine that, we
+ // examine the GTF_ASG flag to see if any assignments were made in the
+ // colon. If there are any, we need to do a tree walk to see if this
+ // variable is the target of an assignment. This treewalk should not
+ // happen frequently.
+ if (rsAdditionalCandidates)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ Compiler::printTreeID(tree);
+ printf(": Qmark-Colon additional spilling candidates are ");
+ dspRegMask(rsAdditionalCandidates);
+ printf("\n");
+ }
+#endif
+
+ // If any candidates are not alive at the GT_QMARK node, then they
+ // need to be spilled
+
+ VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife);
+ VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife,
+ compiler->compCurLifeTree, tree));
+
+ VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow,
+ VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
+
+ VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ // Find the variable in compiler->lvaTable
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+#if !FEATURE_FP_REGALLOC
+ if (varDsc->IsFloatRegType())
+ continue;
+#endif
+
+ noway_assert(varDsc->lvRegister);
+
+ regMaskTP regBit;
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ regBit = genRegMask(varDsc->lvRegNum);
+
+ // For longs we may need to spill both regs
+ if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+ regBit |= genRegMask(varDsc->lvOtherReg);
+ }
+
+ // Is it one of our reg-use vars? If not, we don't need to spill it.
+ regBit &= rsAdditionalCandidates;
+ if (!regBit)
+ continue;
+
+ // Is the variable live at the end of the colon?
+ if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
+ {
+ // Variable is alive at the end of the colon. Was it assigned
+ // to inside the colon?
+
+ if (!(op2->gtFlags & GTF_ASG))
+ continue;
+
+ if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
+ Compiler::WALK_ABORT)
+ {
+ // Variable was assigned to, so we need to spill it.
+
+ rsAdditional |= regBit;
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ Compiler::printTreeID(tree);
+ printf(": Qmark-Colon candidate ");
+ dspRegMask(regBit);
+ printf("\n");
+ printf(" is assigned to inside colon and will be spilled\n");
+ }
+#endif
+ }
+ }
+ else
+ {
+ // Variable is not alive at the end of the colon. We need to spill it.
+
+ rsAdditional |= regBit;
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ Compiler::printTreeID(tree);
+ printf(": Qmark-Colon candidate ");
+ dspRegMask(regBit);
+ printf("\n");
+ printf(" is alive at end of colon and will be spilled\n");
+ }
+#endif
+ }
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ Compiler::printTreeID(tree);
+ printf(": Qmark-Colon approved additional spilling candidates are ");
+ dspRegMask(rsAdditional);
+ printf("\n");
+ }
+#endif
+ }
+
+ noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
+
+ // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
+ // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
+ // we will have unbalanced spills and generate bad code.
+ regMaskTP rsSpill =
+ ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
+
+#ifdef DEBUG
+ // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
+ // 'bad' registers, causing spills. So, just force all used registers to get spilled
+ // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
+ // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
+ // unspilled while generating that same tree.
+
+ if (regSet.rsStressRegs() >= 1)
+ {
+ rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
+ }
+#endif // DEBUG
+
+ if (rsSpill)
+ {
+ // Remember which registers hold pointers. We will spill
+ // them, but the code that follows will fetch reg vars from
+ // the registers, so we need that gc compiler->info.
+ regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
+ regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
+
+ // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
+ // So, pretend there aren't any, and spill them anyway. This will only occur
+ // if rsAdditional is non-empty.
+ regMaskTP rsTemp = regSet.rsMaskVars;
+ regSet.ClearMaskVars();
+
+ regSet.rsSpillRegs(rsSpill);
+
+ // Restore gc tracking masks.
+ gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
+ gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
+
+ // Set regSet.rsMaskVars back to normal
+ regSet.rsMaskVars = rsTemp;
+ }
+ }
+
+ // Generate the conditional jump but without doing any StackFP fixups.
+ genCondJump(op1, lab_true, lab_false, false);
+
+ /* Save the current liveness, register status, and GC pointers */
+ /* This is the liveness information upon entry */
+ /* to both the then and else parts of the qmark */
+
+ saveLiveness(&entryLiveness);
+
+ /* Clear the liveness of any local variables that are dead upon */
+ /* entry to the else part. */
+
+ /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
+ /* from the "colon or op2" liveSet */
+ genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
+
+ /* genCondJump() closes the current emitter block */
+
+ genDefineTempLabel(lab_false);
+
+#if FEATURE_STACK_FP_X87
+ // Store fpstate
+
+ QmarkStateStackFP tempFPState;
+ bool bHasFPUState = !compCurFPState.IsEmpty();
+ genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
+#endif
+
+ /* Does the operator yield a value? */
+
+ if (tree->gtType == TYP_VOID)
+ {
+ /* Generate the code for the else part of the qmark */
+
+ genCodeForTree(elseNode, needReg, bestReg);
+
+ /* The type is VOID, so we shouldn't have computed a value */
+
+ noway_assert(!(elseNode->gtFlags & GTF_REG_VAL));
+
+ /* Save the current liveness, register status, and GC pointers */
+ /* This is the liveness information upon exit of the then part of the qmark */
+
+ saveLiveness(&exitLiveness);
+
+ /* Is there a 'then' part? */
+
+ if (thenNode->IsNothingNode())
+ {
+#if FEATURE_STACK_FP_X87
+ if (bHasFPUState)
+ {
+ // We had FP state on entry just after the condition, so potentially, the else
+ // node may have to do transition work.
+ lab_done = genCreateTempLabel();
+
+ /* Generate jmp lab_done */
+
+ inst_JMP(EJ_jmp, lab_done);
+
+ /* No 'then' - just generate the 'lab_true' label */
+
+ genDefineTempLabel(lab_true);
+
+ // We need to do this after defining the lab_false label
+ genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+ genQMarkAfterThenBlockStackFP(&tempFPState);
+ genDefineTempLabel(lab_done);
+ }
+ else
+#endif // FEATURE_STACK_FP_X87
+ {
+ /* No 'then' - just generate the 'lab_true' label */
+ genDefineTempLabel(lab_true);
+ }
+ }
+ else
+ {
+ lab_done = genCreateTempLabel();
+
+ /* Generate jmp lab_done */
+
+ inst_JMP(EJ_jmp, lab_done);
+
+ /* Restore the liveness that we had upon entry of the then part of the qmark */
+
+ restoreLiveness(&entryLiveness);
+
+ /* Clear the liveness of any local variables that are dead upon */
+ /* entry to the then part. */
+ genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
+
+ /* Generate lab_true: */
+
+ genDefineTempLabel(lab_true);
+#if FEATURE_STACK_FP_X87
+ // We need to do this after defining the lab_false label
+ genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+#endif
+ /* Enter the then part - trash all registers */
+
+ regTracker.rsTrackRegClr();
+
+ /* Generate the code for the then part of the qmark */
+
+ genCodeForTree(thenNode, needReg, bestReg);
+
+ /* The type is VOID, so we shouldn't have computed a value */
+
+ noway_assert(!(thenNode->gtFlags & GTF_REG_VAL));
+
+ unspillLiveness(&exitLiveness);
+
+ /* Verify that the exit liveness information is the same for the two parts of the qmark */
+
+ checkLiveness(&exitLiveness);
+#if FEATURE_STACK_FP_X87
+ genQMarkAfterThenBlockStackFP(&tempFPState);
+#endif
+ /* Define the "result" label */
+
+ genDefineTempLabel(lab_done);
+ }
+
+ /* Join of the two branches - trash all registers */
+
+ regTracker.rsTrackRegClr();
+
+ /* We're just about done */
+
+ genUpdateLife(tree);
+ }
+ else
+ {
+ /* Generate code for a qmark that generates a value */
+
+ /* Generate the code for the else part of the qmark */
+
+ noway_assert(elseNode->IsNothingNode() == false);
+
+ /* Compute the elseNode into any free register */
+ genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+ noway_assert(elseNode->gtFlags & GTF_REG_VAL);
+ noway_assert(elseNode->gtRegNum != REG_NA);
+
+ /* Record the chosen register */
+ reg = elseNode->gtRegNum;
+ regs = genRegMask(reg);
+
+ /* Save the current liveness, register status, and GC pointers */
+ /* This is the liveness information upon exit of the else part of the qmark */
+
+ saveLiveness(&exitLiveness);
+
+ /* Generate jmp lab_done */
+ lab_done = genCreateTempLabel();
+
+#ifdef DEBUG
+ // We will use this to assert we don't emit instructions if we decide not to
+ // do the jmp
+ unsigned emittedInstructions = getEmitter()->emitInsCount;
+ bool bSkippedJump = false;
+#endif
+ // We would like to know here if the else node is really going to generate
+ // code, as if it isn't, we're generating here a jump to the next instruction.
+ // What you would really like is to be able to go back and remove the jump, but
+ // we have no way of doing that right now.
+
+ if (
+#if FEATURE_STACK_FP_X87
+ !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
+#endif
+ genIsEnregisteredIntVariable(thenNode) == reg)
+ {
+#ifdef DEBUG
+ // For the moment, fix this easy case (enregistered else node), which
+ // is the one that happens all the time.
+
+ bSkippedJump = true;
+#endif
+ }
+ else
+ {
+ inst_JMP(EJ_jmp, lab_done);
+ }
+
+ /* Restore the liveness that we had upon entry of the else part of the qmark */
+
+ restoreLiveness(&entryLiveness);
+
+ /* Clear the liveness of any local variables that are dead upon */
+ /* entry to the then part. */
+ genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
+
+ /* Generate lab_true: */
+ genDefineTempLabel(lab_true);
+#if FEATURE_STACK_FP_X87
+ // Store FP state
+
+ // We need to do this after defining the lab_true label
+ genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+#endif
+ /* Enter the then part - trash all registers */
+
+ regTracker.rsTrackRegClr();
+
+ /* Generate the code for the then part of the qmark */
+
+ noway_assert(thenNode->IsNothingNode() == false);
+
+ /* This must place a value into the chosen register */
+ genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
+
+ noway_assert(thenNode->gtFlags & GTF_REG_VAL);
+ noway_assert(thenNode->gtRegNum == reg);
+
+ unspillLiveness(&exitLiveness);
+
+ /* Verify that the exit liveness information is the same for the two parts of the qmark */
+ checkLiveness(&exitLiveness);
+#if FEATURE_STACK_FP_X87
+ genQMarkAfterThenBlockStackFP(&tempFPState);
+#endif
+
+#ifdef DEBUG
+ noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
+#endif
+
+ /* Define the "result" label */
+ genDefineTempLabel(lab_done);
+
+ /* Join of the two branches - trash all registers */
+
+ regTracker.rsTrackRegClr();
+
+ /* Check whether this subtree has freed up any variables */
+
+ genUpdateLife(tree);
+
+ genMarkTreeInReg(tree, reg);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a qmark colon using the CMOV instruction. It's OK
+ * to return false when we can't easily implement it using a cmov (leading
+ * genCodeForQmark to implement it using branches).
+ */
+
+bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+#ifdef _TARGET_XARCH_
+ GenTreePtr cond = tree->gtOp.gtOp1;
+ GenTreePtr colon = tree->gtOp.gtOp2;
+ // Warning: this naming of the local vars is backwards!
+ GenTreePtr thenNode = colon->gtOp.gtOp1;
+ GenTreePtr elseNode = colon->gtOp.gtOp2;
+ GenTreePtr alwaysNode, predicateNode;
+ regNumber reg;
+ regMaskTP needReg = destReg;
+
+ noway_assert(tree->gtOper == GT_QMARK);
+ noway_assert(cond->OperIsCompare());
+ noway_assert(colon->gtOper == GT_COLON);
+
+#ifdef DEBUG
+ if (JitConfig.JitNoCMOV())
+ {
+ return false;
+ }
+#endif
+
+ /* Can only implement CMOV on processors that support it */
+
+ if (!compiler->opts.compUseCMOV)
+ {
+ return false;
+ }
+
+ /* thenNode better be a local or a constant */
+
+ if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
+ {
+ return false;
+ }
+
+ /* elseNode better be a local or a constant or nothing */
+
+ if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
+ {
+ return false;
+ }
+
+ /* can't handle two constants here */
+
+ if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
+ {
+ return false;
+ }
+
+ /* let's not handle comparisons of non-integer types */
+
+ if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
+ {
+ return false;
+ }
+
+ /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary.
+ The biggest constraint is that cmov doesn't take an integer argument.
+ */
+
+ bool reverseCond = false;
+ if (elseNode->OperGet() == GT_CNS_INT)
+ {
+ // else node is a constant
+
+ alwaysNode = elseNode;
+ predicateNode = thenNode;
+ reverseCond = true;
+ }
+ else
+ {
+ alwaysNode = thenNode;
+ predicateNode = elseNode;
+ }
+
+ // If the live set in alwaysNode is not the same as in tree, then
+ // the variable in predicate node dies here. This is a dangerous
+ // case that we don't handle (genComputeReg could overwrite
+ // the value of the variable in the predicate node).
+
+ // This assert is just paranoid (we've already asserted it above)
+ assert(predicateNode->OperGet() == GT_LCL_VAR);
+ if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ return false;
+ }
+
+ // Pass this point we are comitting to use CMOV.
+
+ if (reverseCond)
+ {
+ compiler->gtReverseCond(cond);
+ }
+
+ emitJumpKind jumpKind = genCondSetFlags(cond);
+
+ // Compute the always node into any free register. If it's a constant,
+ // we need to generate the mov instruction here (otherwise genComputeReg might
+ // modify the flags, as in xor reg,reg).
+
+ if (alwaysNode->OperGet() == GT_CNS_INT)
+ {
+ reg = regSet.rsPickReg(needReg, bestReg);
+ inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
+ gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
+ regTracker.rsTrackRegTrash(reg);
+ }
+ else
+ {
+ genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+ noway_assert(alwaysNode->gtFlags & GTF_REG_VAL);
+ noway_assert(alwaysNode->gtRegNum != REG_NA);
+
+ // Record the chosen register
+
+ reg = alwaysNode->gtRegNum;
+ }
+
+ regNumber regPredicate = REG_NA;
+
+ // Is predicateNode an enregistered variable?
+
+ if (genMarkLclVar(predicateNode))
+ {
+ // Variable lives in a register
+
+ regPredicate = predicateNode->gtRegNum;
+ }
+#if REDUNDANT_LOAD
+ else
+ {
+ // Checks if the variable happens to be in any of the registers
+
+ regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
+ }
+#endif
+
+ const static instruction EJtoCMOV[] = {INS_nop, INS_nop, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae,
+ INS_cmove, INS_cmovne, INS_cmovbe, INS_cmova, INS_cmovs, INS_cmovns,
+ INS_cmovpe, INS_cmovpo, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg};
+
+ noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
+ instruction cmov_ins = EJtoCMOV[jumpKind];
+
+ noway_assert(insIsCMOV(cmov_ins));
+
+ if (regPredicate != REG_NA)
+ {
+ // regPredicate is in a register
+
+ inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
+ }
+ else
+ {
+ // regPredicate is in memory
+
+ inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
+ }
+ gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
+ regTracker.rsTrackRegTrash(reg);
+
+ genUpdateLife(alwaysNode);
+ genUpdateLife(predicateNode);
+ genCodeForTree_DONE_LIFE(tree, reg);
+ return true;
+#else
+ return false;
+#endif
+}
+
+#ifdef _TARGET_XARCH_
+void CodeGen::genCodeForMultEAX(GenTreePtr tree)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ bool ovfl = tree->gtOverflow();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP addrReg;
+
+ noway_assert(tree->OperGet() == GT_MUL);
+
+ /* We'll evaluate 'op1' first */
+
+ regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
+
+ /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
+
+ genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ // If op2 is a constant we need to load the constant into a register
+ if (op2->OperKind() & GTK_CONST)
+ {
+ genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regSet.rsMarkRegUsed(op2);
+ addrReg = genRegMask(op2->gtRegNum);
+ }
+ else
+ {
+ /* Make the second operand addressable */
+ // Try to avoid EAX.
+ addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
+ }
+
+ /* Make sure the first operand is still in a register */
+ // op1 *must* go into EAX.
+ genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ // For 8 bit operations, we need to pick byte addressable registers
+
+ if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, reg);
+
+ regTracker.rsTrackRegTrash(byteReg);
+ regSet.rsMarkRegFree(genRegMask(reg));
+
+ reg = byteReg;
+ op1->gtRegNum = reg;
+ regSet.rsMarkRegUsed(op1);
+ }
+
+ /* Make sure the operand is still addressable */
+ addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
+
+ /* Free up the operand, if it's a regvar */
+
+ genUpdateLife(op2);
+
+ /* The register is about to be trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ // For overflow instructions, tree->TypeGet() is the accurate type,
+ // and gives us the size for the operands.
+
+ emitAttr opSize = emitTypeSize(tree->TypeGet());
+
+ /* Compute the new value */
+
+ noway_assert(op1->gtRegNum == REG_EAX);
+
+ // Make sure Edx is free (unless used by op2 itself)
+ bool op2Released = false;
+
+ if ((addrReg & RBM_EDX) == 0)
+ {
+ // op2 does not use Edx, so make sure noone else does either
+ regSet.rsGrabReg(RBM_EDX);
+ }
+ else if (regSet.rsMaskMult & RBM_EDX)
+ {
+ /* Edx is used by op2 and some other trees.
+ Spill the other trees besides op2. */
+
+ regSet.rsGrabReg(RBM_EDX);
+ op2Released = true;
+
+ /* keepReg==RegSet::FREE_REG so that the other multi-used trees
+ don't get marked as unspilled as well. */
+ regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
+ }
+
+ instruction ins;
+
+ if (tree->gtFlags & GTF_UNSIGNED)
+ ins = INS_mulEAX;
+ else
+ ins = INS_imulEAX;
+
+ inst_TT(ins, op2, 0, 0, opSize);
+
+ /* Both EAX and EDX are now trashed */
+
+ regTracker.rsTrackRegTrash(REG_EAX);
+ regTracker.rsTrackRegTrash(REG_EDX);
+
+ /* Free up anything that was tied up by the operand */
+
+ if (!op2Released)
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ /* The result will be where the first operand is sitting */
+
+ /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
+ genRecoverReg(op1, 0, RegSet::KEEP_REG);
+
+ reg = op1->gtRegNum;
+ noway_assert(reg == REG_EAX);
+
+ genReleaseReg(op1);
+
+ /* Do we need an overflow check */
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ genCodeForTree_DONE(tree, reg);
+}
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ noway_assert(tree->OperGet() == GT_MUL);
+
+ /* Generate the first operand into some register */
+
+ genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Generate the second operand into some register */
+
+ genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make sure the first operand is still in a register */
+ genRecoverReg(op1, 0, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Free up the operands */
+ genUpdateLife(tree);
+
+ genReleaseReg(op1);
+ genReleaseReg(op2);
+
+ regNumber regLo = regSet.rsPickReg(destReg, bestReg);
+ regNumber regHi;
+
+ regSet.rsLockReg(genRegMask(regLo));
+ regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
+ regSet.rsUnlockReg(genRegMask(regLo));
+
+ instruction ins;
+ if (tree->gtFlags & GTF_UNSIGNED)
+ ins = INS_umull;
+ else
+ ins = INS_smull;
+
+ getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
+ regTracker.rsTrackRegTrash(regHi);
+ regTracker.rsTrackRegTrash(regLo);
+
+ /* Do we need an overflow check */
+
+ if (tree->gtOverflow())
+ {
+ // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
+ //
+ regSet.rsLockReg(genRegMask(regLo));
+
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ regSet.rsLockReg(genRegMask(regHi));
+
+ regNumber regTmpHi = regHi;
+ if ((tree->gtFlags & GTF_UNSIGNED) == 0)
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
+ regTmpHi = regSet.rsPickReg(RBM_ALLINT);
+ getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
+ regTracker.rsTrackRegTrash(regTmpHi);
+ }
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
+
+ // Jump to the block which will throw the expection
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+
+ // Unlock regLo [and regHi] after generating code for the gtOverflow() case
+ //
+ regSet.rsUnlockReg(genRegMask(regLo));
+
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ regSet.rsUnlockReg(genRegMask(regHi));
+ }
+
+ genUpdateLife(tree);
+
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
+ else
+ genMarkTreeInReg(tree, regLo);
+}
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Generate code for a simple binary arithmetic or logical operator.
+ * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
+ */
+
+void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ instruction ins;
+ genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP needReg = destReg;
+
+ /* Figure out what instruction to generate */
+
+ bool isArith;
+ switch (oper)
+ {
+ case GT_AND:
+ ins = INS_AND;
+ isArith = false;
+ break;
+ case GT_OR:
+ ins = INS_OR;
+ isArith = false;
+ break;
+ case GT_XOR:
+ ins = INS_XOR;
+ isArith = false;
+ break;
+ case GT_ADD:
+ ins = INS_add;
+ isArith = true;
+ break;
+ case GT_SUB:
+ ins = INS_sub;
+ isArith = true;
+ break;
+ case GT_MUL:
+ ins = INS_MUL;
+ isArith = true;
+ break;
+ default:
+ unreached();
+ }
+
+#ifdef _TARGET_XARCH_
+ /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
+
+ if ((oper == GT_MUL) &&
+ op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate
+ !op1->IsCnsIntOrI() && // op1 is not a constant
+ (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
+ !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm"
+ !tree->gtOverflow()) // 3 operand imul doesn't set flags
+ {
+ /* Make the first operand addressable */
+
+ regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
+
+ /* Grab a register for the target */
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+#if LEA_AVAILABLE
+ /* Compute the value into the target: reg=op1*op2_icon */
+ if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
+ {
+ regNumber regSrc;
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ regSrc = op1->gtRegNum;
+ }
+ else
+ {
+ inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
+ regSrc = reg;
+ }
+ getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
+ (op2->gtIntCon.gtIconVal & -2), 0);
+ }
+ else
+#endif // LEA_AVAILABLE
+ {
+ /* Compute the value into the target: reg=op1*op2_icon */
+ inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
+ }
+
+ /* The register has been trashed now */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* The address is no longer live */
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+#endif // _TARGET_XARCH_
+
+ bool ovfl = false;
+
+ if (isArith)
+ {
+ // We only reach here for GT_ADD, GT_SUB and GT_MUL.
+ assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
+
+ ovfl = tree->gtOverflow();
+
+ /* We record the accurate (small) types in trees only we need to
+ * check for overflow. Otherwise we record genActualType()
+ */
+
+ noway_assert(ovfl || (treeType == genActualType(treeType)));
+
+#if LEA_AVAILABLE
+
+ /* Can we use an 'lea' to compute the result?
+ Can't use 'lea' for overflow as it doesn't set flags
+ Can't use 'lea' unless we have at least two free registers */
+ {
+ bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
+ genCountBits(regSet.rsMaskLock) + // Locked registers
+ 2 // We will need two regisers
+ <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
+
+ regMaskTP regs = RBM_NONE; // OUT argument
+ if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, &regs, false))
+ {
+ emitAttr size;
+
+ /* Is the value now computed in some register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ genCodeForTree_REG_VAR1(tree);
+ return;
+ }
+
+ /* If we can reuse op1/2's register directly, and 'tree' is
+ a simple expression (ie. not in scaled index form),
+ might as well just use "add" instead of "lea" */
+
+ // However, if we're in a context where we want to evaluate "tree" into a specific
+ // register different from the reg we'd use in this optimization, then it doesn't
+ // make sense to do the "add", since we'd also have to do a "mov."
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ reg = op1->gtRegNum;
+
+ if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
+ {
+ if (op2->gtFlags & GTF_REG_VAL)
+ {
+ /* Simply add op2 to the register */
+
+ inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ goto DONE_LEA_ADD;
+ }
+ else if (op2->OperGet() == GT_CNS_INT)
+ {
+ /* Simply add op2 to the register */
+
+ genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
+
+ goto DONE_LEA_ADD;
+ }
+ }
+ }
+
+ if (op2->gtFlags & GTF_REG_VAL)
+ {
+ reg = op2->gtRegNum;
+
+ if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
+ {
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ /* Simply add op1 to the register */
+
+ inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ goto DONE_LEA_ADD;
+ }
+ }
+ }
+
+ // The expression either requires a scaled-index form, or the
+ // op1 or op2's register can't be targeted, this can be
+ // caused when op1 or op2 are enregistered variables.
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+ size = emitActualTypeSize(treeType);
+
+ /* Generate "lea reg, [addr-mode]" */
+
+ inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
+
+#ifndef _TARGET_XARCH_
+ // Don't call genFlagsEqualToReg on x86/x64
+ // as it does not set the flags
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+#endif
+
+ DONE_LEA_ADD:
+ /* The register has been trashed now */
+ regTracker.rsTrackRegTrash(reg);
+
+ genDoneAddressable(tree, regs, RegSet::FREE_REG);
+
+ /* The following could be an 'inner' pointer!!! */
+
+ noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
+
+ if (treeType == TYP_BYREF)
+ {
+ genUpdateLife(tree);
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
+ gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+ }
+
+#endif // LEA_AVAILABLE
+
+ noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
+ }
+
+ /* The following makes an assumption about gtSetEvalOrder(this) */
+
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Compute a useful register mask */
+ needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+ needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
+
+ // Determine what registers go live between op1 and op2
+ // Don't bother checking if op1 is already in a register.
+ // This is not just for efficiency; if it's already in a
+ // register then it may already be considered "evaluated"
+ // for the purposes of liveness, in which genNewLiveRegMask
+ // will assert
+ if (!op1->InReg())
+ {
+ regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
+ if (newLiveMask)
+ {
+ needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
+ }
+ }
+
+#if CPU_HAS_BYTE_REGS
+ /* 8-bit operations can only be done in the byte-regs */
+ if (varTypeIsByte(treeType))
+ needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+#endif // CPU_HAS_BYTE_REGS
+
+ // Try selecting one of the 'bestRegs'
+ needReg = regSet.rsNarrowHint(needReg, bestReg);
+
+ /* Special case: small_val & small_mask */
+
+ if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
+ {
+ size_t and_val = op2->gtIntCon.gtIconVal;
+ size_t andMask;
+ var_types typ = op1->TypeGet();
+
+ switch (typ)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ andMask = 0x000000FF;
+ break;
+ case TYP_SHORT:
+ case TYP_CHAR:
+ andMask = 0x0000FFFF;
+ break;
+ default:
+ noway_assert(!"unexpected type");
+ return;
+ }
+
+ // Is the 'and_val' completely contained within the bits found in 'andMask'
+ if ((and_val & ~andMask) == 0)
+ {
+ // We must use unsigned instructions when loading op1
+ if (varTypeIsByte(typ))
+ {
+ op1->gtType = TYP_UBYTE;
+ }
+ else // varTypeIsShort(typ)
+ {
+ assert(varTypeIsShort(typ));
+ op1->gtType = TYP_CHAR;
+ }
+
+ /* Generate the first operand into a scratch register */
+
+ op1 = genCodeForCommaTree(op1);
+ genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regNumber op1Reg = op1->gtRegNum;
+
+ // Did we end up in an acceptable register?
+ // and do we have an acceptable free register available to grab?
+ //
+ if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
+ {
+ // See if we can pick a register from bestReg
+ bestReg &= needReg;
+
+ // Grab an acceptable register
+ regNumber newReg;
+ if ((bestReg & regSet.rsRegMaskFree()) != 0)
+ newReg = regSet.rsGrabReg(bestReg);
+ else
+ newReg = regSet.rsGrabReg(needReg);
+
+ noway_assert(op1Reg != newReg);
+
+ /* Update the value in the target register */
+
+ regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+ inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+ /* The value has been transferred to 'reg' */
+
+ if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+ gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+ /* The value is now in an appropriate register */
+
+ op1->gtRegNum = newReg;
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ genUpdateLife(op1);
+
+ /* Mark the register as 'used' */
+ regSet.rsMarkRegUsed(op1);
+ reg = op1->gtRegNum;
+
+ if (and_val != andMask) // Does the "and" mask only cover some of the bits?
+ {
+ /* "and" the value */
+
+ inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
+ }
+
+#ifdef DEBUG
+ /* Update the live set of register variables */
+ if (compiler->opts.varNames)
+ genUpdateLife(tree);
+#endif
+
+ /* Now we can update the register pointer information */
+
+ genReleaseReg(op1);
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+ genCodeForTree_DONE_LIFE(tree, reg);
+ return;
+ }
+ }
+
+#ifdef _TARGET_XARCH_
+
+ // Do we have to use the special "imul" instruction
+ // which has eax as the implicit operand ?
+ //
+ bool multEAX = false;
+
+ if (oper == GT_MUL)
+ {
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ {
+ /* Only multiplying with EAX will leave the 64-bit
+ * result in EDX:EAX */
+
+ multEAX = true;
+ }
+ else if (ovfl)
+ {
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ /* "mul reg/mem" always has EAX as default operand */
+
+ multEAX = true;
+ }
+ else if (varTypeIsSmall(treeType))
+ {
+ /* Only the "imul with EAX" encoding has the 'w' bit
+ * to specify the size of the operands */
+
+ multEAX = true;
+ }
+ }
+ }
+
+ if (multEAX)
+ {
+ noway_assert(oper == GT_MUL);
+
+ return genCodeForMultEAX(tree);
+ }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+
+ // Do we have to use the special 32x32 => 64 bit multiply
+ //
+ bool mult64 = false;
+
+ if (oper == GT_MUL)
+ {
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ {
+ mult64 = true;
+ }
+ else if (ovfl)
+ {
+ // We always must use the 32x32 => 64 bit multiply
+ // to detect overflow
+ mult64 = true;
+ }
+ }
+
+ if (mult64)
+ {
+ noway_assert(oper == GT_MUL);
+
+ return genCodeForMult64(tree, destReg, bestReg);
+ }
+#endif // _TARGET_ARM_
+
+ /* Generate the first operand into a scratch register */
+
+ op1 = genCodeForCommaTree(op1);
+ genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regNumber op1Reg = op1->gtRegNum;
+
+ // Setup needReg with the set of register that we require for op1 to be in
+ //
+ needReg = RBM_ALLINT;
+
+ /* Compute a useful register mask */
+ needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+ needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
+
+#if CPU_HAS_BYTE_REGS
+ /* 8-bit operations can only be done in the byte-regs */
+ if (varTypeIsByte(treeType))
+ needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+#endif // CPU_HAS_BYTE_REGS
+
+ // Did we end up in an acceptable register?
+ // and do we have an acceptable free register available to grab?
+ //
+ if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
+ {
+ // See if we can pick a register from bestReg
+ bestReg &= needReg;
+
+ // Grab an acceptable register
+ regNumber newReg;
+ if ((bestReg & regSet.rsRegMaskFree()) != 0)
+ newReg = regSet.rsGrabReg(bestReg);
+ else
+ newReg = regSet.rsGrabReg(needReg);
+
+ noway_assert(op1Reg != newReg);
+
+ /* Update the value in the target register */
+
+ regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+ inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+ /* The value has been transferred to 'reg' */
+
+ if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+ gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+ /* The value is now in an appropriate register */
+
+ op1->gtRegNum = newReg;
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ op1Reg = op1->gtRegNum;
+
+ genUpdateLife(op1);
+
+ /* Mark the register as 'used' */
+ regSet.rsMarkRegUsed(op1);
+
+ bool isSmallConst = false;
+
+#ifdef _TARGET_ARM_
+ if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
+ {
+ isSmallConst = true;
+ }
+#endif
+ /* Make the second operand addressable */
+
+ regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
+
+#if CPU_LOAD_STORE_ARCH
+ genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
+#else // !CPU_LOAD_STORE_ARCH
+ /* Is op1 spilled and op2 in a register? */
+
+ if ((op1->gtFlags & GTF_SPILLED) && (op2->gtFlags & GTF_REG_VAL) && (ins != INS_sub))
+ {
+ noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
+
+ // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
+ noway_assert(op2->gtOper != GT_LCL_VAR ||
+ varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
+
+ reg = op2->gtRegNum;
+ regMaskTP regMask = genRegMask(reg);
+
+ /* Is the register holding op2 available? */
+
+ if (regMask & regSet.rsMaskVars)
+ {
+ }
+ else
+ {
+ /* Get the temp we spilled into. */
+
+ TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
+
+ /* For 8bit operations, we need to make sure that op2 is
+ in a byte-addressable registers */
+
+ if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, reg);
+ regTracker.rsTrackRegTrash(byteReg);
+
+ /* op2 couldn't have spilled as it was not sitting in
+ RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ regSet.rsUnlockReg(regMask);
+ regSet.rsMarkRegFree(regMask);
+
+ reg = byteReg;
+ regMask = genRegMask(reg);
+ op2->gtRegNum = reg;
+ regSet.rsMarkRegUsed(op2);
+ }
+
+ inst_RV_ST(ins, reg, temp, 0, treeType);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Free the temp */
+
+ compiler->tmpRlsTemp(temp);
+
+ /* 'add'/'sub' set all CC flags, others only ZF */
+
+ /* If we need to check overflow, for small types, the
+ * flags can't be used as we perform the arithmetic
+ * operation (on small registers) and then sign extend it
+ *
+ * NOTE : If we ever don't need to sign-extend the result,
+ * we can use the flags
+ */
+
+ if (tree->gtSetFlags())
+ {
+ genFlagsEqualToReg(tree, reg);
+ }
+
+ /* The result is where the second operand is sitting. Mark result reg as free */
+ regSet.rsMarkRegFree(genRegMask(reg));
+
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+ goto CHK_OVF;
+ }
+ }
+#endif // !CPU_LOAD_STORE_ARCH
+
+ /* Make sure the first operand is still in a register */
+ regSet.rsLockUsedReg(addrReg);
+ genRecoverReg(op1, 0, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regSet.rsUnlockUsedReg(addrReg);
+
+ reg = op1->gtRegNum;
+
+ // For 8 bit operations, we need to pick byte addressable registers
+
+ if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, reg);
+
+ regTracker.rsTrackRegTrash(byteReg);
+ regSet.rsMarkRegFree(genRegMask(reg));
+
+ reg = byteReg;
+ op1->gtRegNum = reg;
+ regSet.rsMarkRegUsed(op1);
+ }
+
+ /* Make sure the operand is still addressable */
+ addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
+
+ /* Free up the operand, if it's a regvar */
+
+ genUpdateLife(op2);
+
+ /* The register is about to be trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ bool op2Released = false;
+
+ // For overflow instructions, tree->gtType is the accurate type,
+ // and gives us the size for the operands.
+
+ emitAttr opSize = emitTypeSize(treeType);
+
+ /* Compute the new value */
+
+ if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
+#if !CPU_HAS_FP_SUPPORT
+ && (treeType == TYP_INT || treeType == TYP_I_IMPL)
+#endif
+ )
+ {
+ ssize_t ival = op2->gtIntCon.gtIconVal;
+
+ if (oper == GT_ADD)
+ {
+ genIncRegBy(reg, ival, tree, treeType, ovfl);
+ }
+ else if (oper == GT_SUB)
+ {
+ if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
+ (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
+ // Therefore we can't use -ival.
+ )
+ {
+ /* For unsigned overflow, we have to use INS_sub to set
+ the flags correctly */
+
+ genDecRegBy(reg, ival, tree);
+ }
+ else
+ {
+ /* Else, we simply add the negative of the value */
+
+ genIncRegBy(reg, -ival, tree, treeType, ovfl);
+ }
+ }
+ else if (oper == GT_MUL)
+ {
+ genMulRegBy(reg, ival, tree, treeType, ovfl);
+ }
+ }
+ else
+ {
+ // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
+ op2 = op2->gtEffectiveVal();
+ if (varTypeIsByte(treeType) && op2->InReg())
+ {
+ noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
+
+ regNumber op2reg = op2->gtRegNum;
+ regMaskTP op2regMask = genRegMask(op2reg);
+
+ if (!(op2regMask & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, op2reg);
+ regTracker.rsTrackRegTrash(byteReg);
+
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+ op2Released = true;
+
+ op2->gtRegNum = byteReg;
+ }
+ }
+
+ inst_RV_TT(ins, reg, op2, 0, opSize, flags);
+ }
+
+ /* Free up anything that was tied up by the operand */
+
+ if (!op2Released)
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ /* The result will be where the first operand is sitting */
+
+ /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
+ genRecoverReg(op1, 0, RegSet::KEEP_REG);
+
+ reg = op1->gtRegNum;
+
+ /* 'add'/'sub' set all CC flags, others only ZF+SF */
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ genReleaseReg(op1);
+
+#if !CPU_LOAD_STORE_ARCH
+CHK_OVF:
+#endif // !CPU_LOAD_STORE_ARCH
+
+ /* Do we need an overflow check */
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
+ * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
+ */
+
+void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ instruction ins;
+ const genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP needReg = destReg;
+ regMaskTP addrReg;
+
+ /* Figure out what instruction to generate */
+
+ bool isArith;
+ switch (oper)
+ {
+ case GT_ASG_AND:
+ ins = INS_AND;
+ isArith = false;
+ break;
+ case GT_ASG_OR:
+ ins = INS_OR;
+ isArith = false;
+ break;
+ case GT_ASG_XOR:
+ ins = INS_XOR;
+ isArith = false;
+ break;
+ case GT_ASG_ADD:
+ ins = INS_add;
+ isArith = true;
+ break;
+ case GT_ASG_SUB:
+ ins = INS_sub;
+ isArith = true;
+ break;
+ default:
+ unreached();
+ }
+
+ bool ovfl = false;
+
+ if (isArith)
+ {
+ // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
+
+ ovfl = tree->gtOverflow();
+
+ // We can't use += with overflow if the value cannot be changed
+ // in case of an overflow-exception which the "+" might cause
+ noway_assert(!ovfl ||
+ ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
+
+ /* Do not allow overflow instructions with refs/byrefs */
+
+ noway_assert(!ovfl || !varTypeIsGC(treeType));
+
+ // We disallow overflow and byte-ops here as it is too much trouble
+ noway_assert(!ovfl || !varTypeIsByte(treeType));
+
+ /* Is the second operand a constant? */
+
+ if (op2->IsIntCnsFitsInI32())
+ {
+ int ival = (int)op2->gtIntCon.gtIconVal;
+
+ /* What is the target of the assignment? */
+
+ switch (op1->gtOper)
+ {
+ case GT_REG_VAR:
+
+ REG_VAR4:
+
+ reg = op1->gtRegVar.gtRegNum;
+
+ /* No registers are needed for addressing */
+
+ addrReg = RBM_NONE;
+#if !CPU_LOAD_STORE_ARCH
+ INCDEC_REG:
+#endif
+ /* We're adding a constant to a register */
+
+ if (oper == GT_ASG_ADD)
+ genIncRegBy(reg, ival, tree, treeType, ovfl);
+ else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
+ ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
+ // 0x80000000.
+ // Therefore we can't
+ // use -ival.
+ )
+ /* For unsigned overflow, we have to use INS_sub to set
+ the flags correctly */
+ genDecRegBy(reg, ival, tree);
+ else
+ genIncRegBy(reg, -ival, tree, treeType, ovfl);
+
+ break;
+
+ case GT_LCL_VAR:
+
+ /* Does the variable live in a register? */
+
+ if (genMarkLclVar(op1))
+ goto REG_VAR4;
+
+ __fallthrough;
+
+ default:
+
+ /* Make the target addressable for load/store */
+ addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
+
+#if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
+ /* For small types with overflow check, we need to
+ sign/zero extend the result, so we need it in a reg */
+
+ if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+ {
+ // Load op1 into a reg
+
+ reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
+
+ inst_RV_TT(INS_mov, reg, op1);
+
+ // Issue the add/sub and the overflow check
+
+ inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
+ regTracker.rsTrackRegTrash(reg);
+
+ if (ovfl)
+ {
+ genCheckOverflow(tree);
+ }
+
+ /* Store the (sign/zero extended) result back to
+ the stack location of the variable */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+ break;
+ }
+#if !CPU_LOAD_STORE_ARCH
+ else
+ {
+ /* Add/subtract the new value into/from the target */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ reg = op1->gtRegNum;
+ goto INCDEC_REG;
+ }
+
+ /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
+ if (!ovfl && (ival == 1 || ival == -1) &&
+ !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
+ {
+ noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
+ if (oper == GT_ASG_SUB)
+ ival = -ival;
+
+ ins = (ival > 0) ? INS_inc : INS_dec;
+ inst_TT(ins, op1);
+ }
+ else
+ {
+ inst_TT_IV(ins, op1, ival);
+ }
+
+ if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
+ {
+ if (tree->gtSetFlags())
+ genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
+ }
+
+ break;
+ }
+#endif // !CPU_LOAD_STORE_ARCH
+ } // end switch (op1->gtOper)
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+ return;
+ } // end if (op2->IsIntCnsFitsInI32())
+ } // end if (isArith)
+
+ noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
+
+ /* Is the target a register or local variable? */
+
+ switch (op1->gtOper)
+ {
+ case GT_LCL_VAR:
+
+ /* Does the target variable live in a register? */
+
+ if (!genMarkLclVar(op1))
+ break;
+
+ __fallthrough;
+
+ case GT_REG_VAR:
+
+ /* Get hold of the target register */
+
+ reg = op1->gtRegVar.gtRegNum;
+
+ /* Make sure the target of the store is available */
+
+ if (regSet.rsMaskUsed & genRegMask(reg))
+ {
+ regSet.rsSpillReg(reg);
+ }
+
+ /* Make the RHS addressable */
+
+ addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
+
+ /* Compute the new value into the target register */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_HAS_BYTE_REGS
+
+ // Fix 383833 X86 ILGEN
+ regNumber reg2;
+ if ((op2->gtFlags & GTF_REG_VAL) != 0)
+ {
+ reg2 = op2->gtRegNum;
+ }
+ else
+ {
+ reg2 = REG_STK;
+ }
+
+ // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
+ // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
+ //
+ if (varTypeIsByte(treeType) &&
+ (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
+ {
+ // We will force op2 into a register (via sign/zero extending load)
+ // for the cases where op2 is in memory and thus could have
+ // an unmapped page just beyond its location
+ //
+ if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
+ {
+ genCodeForTree(op2, 0);
+ assert((op2->gtFlags & GTF_REG_VAL) != 0);
+ }
+
+ inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
+
+ bool canOmit = false;
+
+ if (varTypeIsUnsigned(treeType))
+ {
+ // When op2 is a byte sized constant we can omit the zero extend instruction
+ if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
+ {
+ canOmit = true;
+ }
+ }
+ else // treeType is signed
+ {
+ // When op2 is a positive 7-bit or smaller constant
+ // we can omit the sign extension sequence.
+ if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
+ {
+ canOmit = true;
+ }
+ }
+
+ if (!canOmit)
+ {
+ // If reg is a byte reg then we can use a movzx/movsx instruction
+ //
+ if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
+ {
+ instruction extendIns = ins_Move_Extend(treeType, true);
+ inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
+ }
+ else // we can't encode a movzx/movsx instruction
+ {
+ if (varTypeIsUnsigned(treeType))
+ {
+ // otherwise, we must zero the upper 24 bits of 'reg'
+ inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
+ }
+ else // treeType is signed
+ {
+ // otherwise, we must sign extend the result in the non-byteable register 'reg'
+ // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
+ // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
+ //
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
+ }
+ }
+ }
+ }
+ else
+#endif // CPU_HAS_BYTE_REGS
+ {
+ inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
+ }
+
+ /* The zero flag is now equal to the register value */
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ /* Remember that we trashed the target */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Free up anything that was tied up by the RHS */
+
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+ return;
+
+ default:
+ break;
+ } // end switch (op1->gtOper)
+
+#if !CPU_LOAD_STORE_ARCH
+ /* Special case: "x ^= -1" is actually "not(x)" */
+
+ if (oper == GT_ASG_XOR)
+ {
+ if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
+ {
+ addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
+ inst_TT(INS_NOT, op1);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
+ return;
+ }
+ }
+#endif // !CPU_LOAD_STORE_ARCH
+
+ /* Setup target mask for op2 (byte-regs for small operands) */
+
+ unsigned needMask;
+ needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
+
+ /* Is the second operand a constant? */
+
+ if (op2->IsIntCnsFitsInI32())
+ {
+ int ival = (int)op2->gtIntCon.gtIconVal;
+
+ /* Make the target addressable */
+ addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
+
+ inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
+ return;
+ }
+
+ /* Is the value or the address to be computed first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Compute the new value into a register */
+
+ genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+ /* Make the target addressable for load/store */
+ addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
+ regSet.rsLockUsedReg(addrReg);
+
+#if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+ /* For small types with overflow check, we need to
+ sign/zero extend the result, so we need it in a reg */
+
+ if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+ {
+ reg = regSet.rsPickReg();
+ regSet.rsLockReg(genRegMask(reg));
+
+ noway_assert(genIsValidReg(reg));
+
+ /* Generate "ldr reg, [var]" */
+
+ inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
+
+ if (op1->gtOper == GT_LCL_VAR)
+ regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
+ else
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Make sure the new value is in a register */
+
+ genRecoverReg(op2, 0, RegSet::KEEP_REG);
+
+ /* Compute the new value */
+
+ inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ /* Move the new value back to the variable */
+ /* Generate "str reg, [var]" */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+ regSet.rsUnlockReg(genRegMask(reg));
+
+ if (op1->gtOper == GT_LCL_VAR)
+ regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
+ }
+#if !CPU_LOAD_STORE_ARCH
+ else
+ {
+ /* Make sure the new value is in a register */
+
+ genRecoverReg(op2, 0, RegSet::KEEP_REG);
+
+ /* Add the new value into the target */
+
+ inst_TT_RV(ins, op1, op2->gtRegNum);
+ }
+#endif // !CPU_LOAD_STORE_ARCH
+ /* Free up anything that was tied up either side */
+ regSet.rsUnlockUsedReg(addrReg);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ genReleaseReg(op2);
+ }
+ else
+ {
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
+
+ /* Compute the new value into a register */
+
+ genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
+
+ /* Make sure the target is still addressable */
+
+ addrReg = genKeepAddressable(op1, addrReg);
+ regSet.rsLockUsedReg(addrReg);
+
+#if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
+ /* For small types with overflow check, we need to
+ sign/zero extend the result, so we need it in a reg */
+
+ if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+ {
+ reg = regSet.rsPickReg();
+
+ inst_RV_TT(INS_mov, reg, op1);
+
+ inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
+ regTracker.rsTrackRegTrash(reg);
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+ if (op1->gtOper == GT_LCL_VAR)
+ regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
+ }
+#if !CPU_LOAD_STORE_ARCH
+ else
+ {
+ /* Add the new value into the target */
+
+ inst_TT_RV(ins, op1, op2->gtRegNum);
+ }
+#endif
+
+ /* Free up anything that was tied up either side */
+ regSet.rsUnlockUsedReg(addrReg);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
+ genReleaseReg(op2);
+ }
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for GT_UMOD.
+ */
+
+void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_UMOD);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ /* Is this a division by an integer constant? */
+
+ noway_assert(op2);
+ if (compiler->fgIsUnsignedModOptimizable(op2))
+ {
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ /* Generate the appropriate sequence */
+ size_t ival = op2->gtIntCon.gtIconVal - 1;
+ inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for GT_MOD.
+ */
+
+void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_MOD);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ /* Is this a division by an integer constant? */
+
+ noway_assert(op2);
+ if (compiler->fgIsSignedModOptimizable(op2))
+ {
+ ssize_t ival = op2->gtIntCon.gtIconVal;
+ BasicBlock* skip = genCreateTempLabel();
+
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ /* Generate the appropriate sequence */
+
+ inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Check and branch for a postive value */
+ emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+ inst_JMP(jmpGEL, skip);
+
+ /* Generate the rest of the sequence and we're done */
+
+ genIncRegBy(reg, -1, NULL, treeType);
+ ival = -ival;
+ if ((treeType == TYP_LONG) && ((int)ival != ival))
+ {
+ regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
+ inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
+ }
+ else
+ {
+ inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
+ }
+ genIncRegBy(reg, 1, NULL, treeType);
+
+ /* Define the 'skip' label and we're done */
+
+ genDefineTempLabel(skip);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for GT_UDIV.
+ */
+
+void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_UDIV);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ /* Is this a division by an integer constant? */
+
+ noway_assert(op2);
+ if (compiler->fgIsUnsignedDivOptimizable(op2))
+ {
+ size_t ival = op2->gtIntCon.gtIconVal;
+
+ /* Division by 1 must be handled elsewhere */
+
+ noway_assert(ival != 1 || compiler->opts.MinOpts());
+
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ /* Generate "shr reg, log2(value)" */
+
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for GT_DIV.
+ */
+
+void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_DIV);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ /* Is this a division by an integer constant? */
+
+ noway_assert(op2);
+ if (compiler->fgIsSignedDivOptimizable(op2))
+ {
+ ssize_t ival_s = op2->gtIntConCommon.IconValue();
+ assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
+ size_t ival = static_cast<size_t>(ival_s);
+
+ /* Division by 1 must be handled elsewhere */
+
+ noway_assert(ival != 1);
+
+ BasicBlock* onNegDivisee = genCreateTempLabel();
+
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ if (ival == 2)
+ {
+ /* Generate "sar reg, log2(value)" */
+
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
+
+ // Check and branch for a postive value, skipping the INS_ADDC instruction
+ emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+ inst_JMP(jmpGEL, onNegDivisee);
+
+ // Add the carry flag to 'reg'
+ inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
+
+ /* Define the 'onNegDivisee' label and we're done */
+
+ genDefineTempLabel(onNegDivisee);
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* The result is the same as the operand */
+
+ reg = op1->gtRegNum;
+ }
+ else
+ {
+ /* Generate the following sequence */
+ /*
+ test reg, reg
+ jns onNegDivisee
+ add reg, ival-1
+ onNegDivisee:
+ sar reg, log2(ival)
+ */
+
+ instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
+
+ // Check and branch for a postive value, skipping the INS_add instruction
+ emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+ inst_JMP(jmpGEL, onNegDivisee);
+
+ inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
+
+ /* Define the 'onNegDivisee' label and we're done */
+
+ genDefineTempLabel(onNegDivisee);
+
+ /* Generate "sar reg, log2(value)" */
+
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* The result is the same as the operand */
+
+ reg = op1->gtRegNum;
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
+ * (if op2 is not a power of 2 constant).
+ */
+
+void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
+ tree->OperGet() == GT_DIV);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+ instruction ins;
+ bool gotOp1;
+ regMaskTP addrReg;
+
+#if USE_HELPERS_FOR_INT_DIV
+ noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+ /* Which operand are we supposed to evaluate first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* We'll evaluate 'op2' first */
+
+ gotOp1 = false;
+ destReg &= ~op1->gtRsvdRegs;
+
+ /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ if (varDsc->lvRegister)
+ {
+ destReg &= ~genRegMask(varDsc->lvRegNum);
+ }
+ }
+ }
+ else
+ {
+ /* We'll evaluate 'op1' first */
+
+ gotOp1 = true;
+
+ regMaskTP op1Mask;
+ if (RBM_EAX & op2->gtRsvdRegs)
+ op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
+ else
+ op1Mask = RBM_EAX; // EAX would be ideal
+
+ /* Generate the dividend into EAX and hold on to it. freeOnly=true */
+
+ genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+ }
+
+ /* We want to avoid using EAX or EDX for the second operand */
+
+ destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
+
+ /* Make the second operand addressable */
+ op2 = genCodeForCommaTree(op2);
+
+ /* Special case: if op2 is a local var we are done */
+
+ if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
+ {
+ if ((op2->gtFlags & GTF_REG_VAL) == 0)
+ addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
+ else
+ addrReg = 0;
+ }
+ else
+ {
+ genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ addrReg = genRegMask(op2->gtRegNum);
+ }
+
+ /* Make sure we have the dividend in EAX */
+
+ if (gotOp1)
+ {
+ /* We've previously computed op1 into EAX */
+
+ genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Compute op1 into EAX and hold on to it */
+
+ genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ }
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegNum == REG_EAX);
+
+ /* We can now safely (we think) grab EDX */
+
+ regSet.rsGrabReg(RBM_EDX);
+ regSet.rsLockReg(RBM_EDX);
+
+ /* Convert the integer in EAX into a un/signed long in EDX:EAX */
+
+ const genTreeOps oper = tree->OperGet();
+
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
+ else
+ instGen(INS_cdq);
+
+ /* Make sure the divisor is still addressable */
+
+ addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
+
+ /* Perform the division */
+
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ inst_TT(INS_UNSIGNED_DIVIDE, op2);
+ else
+ inst_TT(INS_SIGNED_DIVIDE, op2);
+
+ /* Free up anything tied up by the divisor's address */
+
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ /* Unlock and free EDX */
+
+ regSet.rsUnlockReg(RBM_EDX);
+
+ /* Free up op1 (which is in EAX) as well */
+
+ genReleaseReg(op1);
+
+ /* Both EAX and EDX are now trashed */
+
+ regTracker.rsTrackRegTrash(REG_EAX);
+ regTracker.rsTrackRegTrash(REG_EDX);
+
+ /* Figure out which register the result is in */
+
+ reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
+
+ /* Don't forget to mark the first operand as using EAX and EDX */
+
+ op1->gtRegNum = reg;
+
+ genCodeForTree_DONE(tree, reg);
+
+#elif defined(_TARGET_ARM_)
+
+ /* Which operand are we supposed to evaluate first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* We'll evaluate 'op2' first */
+
+ gotOp1 = false;
+ destReg &= ~op1->gtRsvdRegs;
+
+ /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ if (varDsc->lvRegister)
+ {
+ destReg &= ~genRegMask(varDsc->lvRegNum);
+ }
+ }
+ }
+ else
+ {
+ /* We'll evaluate 'op1' first */
+
+ gotOp1 = true;
+ regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
+
+ /* Generate the dividend into a register and hold on to it. */
+
+ genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+ }
+
+ /* Evaluate the second operand into a register and hold onto it. */
+
+ genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ addrReg = genRegMask(op2->gtRegNum);
+
+ if (gotOp1)
+ {
+ // Recover op1 if spilled
+ genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Compute op1 into any register and hold on to it */
+ genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ // Perform the divison
+
+ const genTreeOps oper = tree->OperGet();
+
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ ins = INS_udiv;
+ else
+ ins = INS_sdiv;
+
+ getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
+
+ if (oper == GT_UMOD || oper == GT_MOD)
+ {
+ getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
+ getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
+ }
+ /* Free up op1 and op2 */
+ genReleaseReg(op1);
+ genReleaseReg(op2);
+
+ genCodeForTree_DONE(tree, reg);
+
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
+ */
+
+void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
+
+ const genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ regMaskTP needReg = destReg;
+ regNumber reg;
+ instruction ins;
+ regMaskTP addrReg;
+
+ switch (oper)
+ {
+ case GT_ASG_LSH:
+ ins = INS_SHIFT_LEFT_LOGICAL;
+ break;
+ case GT_ASG_RSH:
+ ins = INS_SHIFT_RIGHT_ARITHM;
+ break;
+ case GT_ASG_RSZ:
+ ins = INS_SHIFT_RIGHT_LOGICAL;
+ break;
+ default:
+ unreached();
+ }
+
+ noway_assert(!varTypeIsGC(treeType));
+ noway_assert(op2);
+
+ /* Shifts by a constant amount are easier */
+
+ if (op2->IsCnsIntOrI())
+ {
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+ /* Are we shifting a register left by 1 bit? */
+
+ if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && (op1->gtFlags & GTF_REG_VAL))
+ {
+ /* The target lives in a register */
+
+ reg = op1->gtRegNum;
+
+ /* "add reg, reg" is cheaper than "shl reg, 1" */
+
+ inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
+ }
+ else
+ {
+#if CPU_LOAD_STORE_ARCH
+ if ((op1->gtFlags & GTF_REG_VAL) == 0)
+ {
+ regSet.rsLockUsedReg(addrReg);
+
+ // Load op1 into a reg
+
+ reg = regSet.rsPickReg(RBM_ALLINT);
+
+ inst_RV_TT(INS_mov, reg, op1);
+
+ // Issue the shift
+
+ inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Store the (sign/zero extended) result back to the stack location of the variable */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+ regSet.rsUnlockUsedReg(addrReg);
+ }
+ else
+#endif // CPU_LOAD_STORE_ARCH
+ {
+ /* Shift by the constant value */
+
+ inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
+ }
+ }
+
+ /* If the target is a register, it has a new value */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ regTracker.rsTrackRegTrash(op1->gtRegNum);
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ /* The zero flag is now equal to the target value */
+ /* X86: But only if the shift count is != 0 */
+
+ if (op2->gtIntCon.gtIconVal != 0)
+ {
+ if (tree->gtSetFlags())
+ {
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
+ }
+ else if (op1->gtOper == GT_REG_VAR)
+ {
+ genFlagsEqualToReg(tree, op1->gtRegNum);
+ }
+ }
+ }
+ else
+ {
+ // It is possible for the shift count to equal 0 with valid
+ // IL, and not be optimized away, in the case where the node
+ // is of a small type. The sequence of instructions looks like
+ // ldsfld, shr, stsfld and executed on a char field. This will
+ // never happen with code produced by our compilers, because the
+ // compilers will insert a conv.u2 before the stsfld (which will
+ // lead us down a different codepath in the JIT and optimize away
+ // the shift by zero). This case is not worth optimizing and we
+ // will just make sure to generate correct code for it.
+
+ genFlagsEqualToNone();
+ }
+ }
+ else
+ {
+ regMaskTP op2Regs = RBM_NONE;
+ if (REG_SHIFT != REG_NA)
+ op2Regs = RBM_SHIFT;
+
+ regMaskTP tempRegs;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
+ genCodeForTree(op2, tempRegs);
+ regSet.rsMarkRegUsed(op2);
+
+ tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
+ addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
+
+ genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
+ regMaskTP excludeMask = op2->gtRsvdRegs;
+ if (REG_SHIFT != REG_NA)
+ excludeMask |= RBM_SHIFT;
+
+ tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
+ addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
+
+ /* Load the shift count into the necessary register */
+ genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ }
+
+ /* Make sure the address registers are still here */
+ addrReg = genKeepAddressable(op1, addrReg, op2Regs);
+
+#ifdef _TARGET_XARCH_
+ /* Perform the shift */
+ inst_TT_CL(ins, op1);
+#else
+ /* Perform the shift */
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ op2Regs = genRegMask(op2->gtRegNum);
+
+ regSet.rsLockUsedReg(addrReg | op2Regs);
+ inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
+ regSet.rsUnlockUsedReg(addrReg | op2Regs);
+#endif
+ /* Free the address registers */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ /* If the value is in a register, it's now trash */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ regTracker.rsTrackRegTrash(op1->gtRegNum);
+
+ /* Release the op2 [RBM_SHIFT] operand */
+
+ genReleaseReg(op2);
+ }
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
+ */
+
+void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperIsShift());
+
+ const genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ regMaskTP needReg = destReg;
+ regNumber reg;
+ instruction ins;
+
+ switch (oper)
+ {
+ case GT_LSH:
+ ins = INS_SHIFT_LEFT_LOGICAL;
+ break;
+ case GT_RSH:
+ ins = INS_SHIFT_RIGHT_ARITHM;
+ break;
+ case GT_RSZ:
+ ins = INS_SHIFT_RIGHT_LOGICAL;
+ break;
+ default:
+ unreached();
+ }
+
+ /* Is the shift count constant? */
+ noway_assert(op2);
+ if (op2->IsIntCnsFitsInI32())
+ {
+ // TODO: Check to see if we could generate a LEA instead!
+
+ /* Compute the left operand into any free register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ reg = op1->gtRegNum;
+
+ /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
+
+ // On ARM, until proven otherwise by performance numbers, just do the shift.
+ // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
+ // It's smaller than two "add reg, reg".
+
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_ARM_
+ if (oper == GT_LSH)
+ {
+ emitAttr size = emitActualTypeSize(treeType);
+ if (op2->gtIntConCommon.IconValue() == 1)
+ {
+ /* "add reg, reg" is smaller and faster than "shl reg, 1" */
+ inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
+ }
+ else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
+ {
+ /* two "add reg, reg" instructions are faster than "shl reg, 2" */
+ inst_RV_RV(INS_add, reg, reg, treeType);
+ inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
+ }
+ else
+ goto DO_SHIFT_BY_CNS;
+ }
+ else
+#endif // _TARGET_ARM_
+ {
+#ifndef _TARGET_ARM_
+ DO_SHIFT_BY_CNS:
+#endif // _TARGET_ARM_
+ // If we are shifting 'reg' by zero bits and do not need the flags to be set
+ // then we can just skip emitting the instruction as 'reg' is already correct.
+ //
+ if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
+ {
+ /* Generate the appropriate shift instruction */
+ inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
+ }
+ }
+ }
+ else
+ {
+ /* Calculate a useful register mask for computing op1 */
+ needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
+ regMaskTP op2RegMask;
+#ifdef _TARGET_XARCH_
+ op2RegMask = RBM_ECX;
+#else
+ op2RegMask = RBM_NONE;
+#endif
+ needReg = regSet.rsMustExclude(needReg, op2RegMask);
+
+ regMaskTP tempRegs;
+
+ /* Which operand are we supposed to evaluate first? */
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Load the shift count [into ECX on XARCH] */
+ tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
+ genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
+
+ /* We must not target the register that is holding op2 */
+ needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
+
+ /* Now evaluate 'op1' into a free register */
+ genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+
+ /* Recover op2 into ECX */
+ genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
+ tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+ genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+
+ /* Load the shift count [into ECX on XARCH] */
+ genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
+ }
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+#ifdef _TARGET_XARCH_
+ noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
+#endif
+ // Check for the case of op1 being spilled during the evaluation of op2
+ if (op1->gtFlags & GTF_SPILLED)
+ {
+ // The register has been spilled -- reload it to any register except ECX
+ regSet.rsLockUsedReg(op2RegMask);
+ regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
+ regSet.rsUnlockUsedReg(op2RegMask);
+ }
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ reg = op1->gtRegNum;
+
+#ifdef _TARGET_ARM_
+ /* Perform the shift */
+ getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
+#else
+ /* Perform the shift */
+ inst_RV_CL(ins, reg);
+#endif
+ genReleaseReg(op2);
+ }
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(reg == op1->gtRegNum);
+
+ /* The register is now trashed */
+ genReleaseReg(op1);
+ regTracker.rsTrackRegTrash(reg);
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
+ * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
+ */
+
+void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
+ tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
+
+ const genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ // Longs and float comparisons are converted to "?:"
+ noway_assert(!compiler->fgMorphRelopToQmark(op1));
+
+ // Check if we can use the currently set flags. Else set them
+
+ emitJumpKind jumpKind = genCondSetFlags(tree);
+
+ // Grab a register to materialize the bool value into
+
+ bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
+
+ // Check that the predictor did the right job
+ noway_assert(bestReg);
+
+ // If needReg is in bestReg then use it
+ if (needReg & bestReg)
+ reg = regSet.rsGrabReg(needReg & bestReg);
+ else
+ reg = regSet.rsGrabReg(bestReg);
+
+#if defined(_TARGET_ARM_)
+
+ // Generate:
+ // jump-if-true L_true
+ // mov reg, 0
+ // jmp L_end
+ // L_true:
+ // mov reg, 1
+ // L_end:
+
+ BasicBlock* L_true;
+ BasicBlock* L_end;
+
+ L_true = genCreateTempLabel();
+ L_end = genCreateTempLabel();
+
+ inst_JMP(jumpKind, L_true);
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
+ inst_JMP(EJ_jmp, L_end);
+ genDefineTempLabel(L_true);
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
+ genDefineTempLabel(L_end);
+
+ regTracker.rsTrackRegTrash(reg);
+
+#elif defined(_TARGET_XARCH_)
+ regMaskTP regs = genRegMask(reg);
+ noway_assert(regs & RBM_BYTE_REGS);
+
+ // Set (lower byte of) reg according to the flags
+
+ /* Look for the special case where just want to transfer the carry bit */
+
+ if (jumpKind == EJ_jb)
+ {
+ inst_RV_RV(INS_SUBC, reg, reg);
+ inst_RV(INS_NEG, reg, TYP_INT);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ else if (jumpKind == EJ_jae)
+ {
+ inst_RV_RV(INS_SUBC, reg, reg);
+ genIncRegBy(reg, 1, tree, TYP_INT);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ else
+ {
+ inst_SET(jumpKind, reg);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ if (treeType == TYP_INT)
+ {
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ }
+ else
+ {
+ noway_assert(treeType == TYP_BYTE);
+ }
+ }
+#else
+ NYI("TARGET");
+#endif // _TARGET_XXX
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+//------------------------------------------------------------------------
+// genCodeForCopyObj: Generate code for a CopyObj node
+//
+// Arguments:
+// tree - The CopyObj node we are going to generate code for.
+// destReg - The register mask for register(s), if any, that will be defined.
+//
+// Return Value:
+// None
+
+void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg)
+{
+ // If the value class doesn't have any fields that are GC refs or
+ // the target isn't on the GC-heap, we can merge it with CPBLK.
+ // GC fields cannot be copied directly, instead we will
+ // need to use a jit-helper for that.
+ assert(tree->gtOper == GT_ASG);
+ assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
+
+ GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
+ assert(cpObjOp->HasGCPtr());
+
+#ifdef _TARGET_ARM_
+ if (cpObjOp->IsVolatile())
+ {
+ // Emit a memory barrier instruction before the CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif
+ assert(tree->gtOp.gtOp2->OperIsIndir());
+ GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
+ GenTreePtr dstObj = cpObjOp->Addr();
+
+ noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
+
+#ifdef DEBUG
+ CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
+ size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+
+ // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
+ // The EE currently does not allow this. Let's assert it just to be safe.
+ noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
+#endif
+
+ size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
+ unsigned slots = cpObjOp->gtSlots;
+ BYTE* gcPtrs = cpObjOp->gtGcPtrs;
+ unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
+ assert(blkSize == cpObjOp->gtBlkSize);
+
+ GenTreePtr treeFirst, treeSecond;
+ regNumber regFirst, regSecond;
+
+ // Check what order the object-ptrs have to be evaluated in ?
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ treeFirst = srcObj;
+ treeSecond = dstObj;
+#if CPU_USES_BLOCK_MOVE
+ regFirst = REG_ESI;
+ regSecond = REG_EDI;
+#else
+ regFirst = REG_ARG_1;
+ regSecond = REG_ARG_0;
+#endif
+ }
+ else
+ {
+ treeFirst = dstObj;
+ treeSecond = srcObj;
+#if CPU_USES_BLOCK_MOVE
+ regFirst = REG_EDI;
+ regSecond = REG_ESI;
+#else
+ regFirst = REG_ARG_0;
+ regSecond = REG_ARG_1;
+#endif
+ }
+
+ bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
+ bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
+ emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+
+#if CPU_USES_BLOCK_MOVE
+ // Materialize the trees in the order desired
+
+ genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
+
+ // Grab ECX because it will be trashed by the helper
+ //
+ regSet.rsGrabReg(RBM_ECX);
+
+ while (blkSize >= TARGET_POINTER_SIZE)
+ {
+ if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
+ {
+ // Note that we can use movsd even if it is a GC pointer being transfered
+ // because the value is not cached anywhere. If we did this in two moves,
+ // we would have to make certain we passed the appropriate GC info on to
+ // the emitter.
+ instGen(INS_movsp);
+ }
+ else
+ {
+ // This helper will act like a MOVSD
+ // -- inputs EDI and ESI are byrefs
+ // -- including incrementing of ESI and EDI by 4
+ // -- helper will trash ECX
+ //
+ regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
+ regSet.rsLockUsedReg(argRegs);
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+ regSet.rsUnlockUsedReg(argRegs);
+ }
+
+ blkSize -= TARGET_POINTER_SIZE;
+ }
+
+ // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
+
+ regTracker.rsTrackRegTrash(REG_EDI);
+ regTracker.rsTrackRegTrash(REG_ESI);
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
+
+ /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
+ it is a emitNoGChelper. However, we have to let the emitter know that
+ the GC liveness has changed. We do this by creating a new label.
+ */
+
+ noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
+
+ genDefineTempLabel(&dummyBB);
+
+#else // !CPU_USES_BLOCK_MOVE
+
+#ifndef _TARGET_ARM_
+// Currently only the ARM implementation is provided
+#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
+#endif
+
+ // Materialize the trees in the order desired
+ bool helperUsed;
+ regNumber regDst;
+ regNumber regSrc;
+ regNumber regTemp;
+
+ if ((gcPtrCount > 0) && !dstIsOnStack)
+ {
+ genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
+
+ /* The helper is a Asm-routine that will trash R2,R3 and LR */
+ {
+ /* Spill any callee-saved registers which are being used */
+ regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
+
+ if (spillRegs)
+ {
+ regSet.rsSpillRegs(spillRegs);
+ }
+ }
+
+ // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
+ // We will also use it as the temp register for our load/store sequences
+ //
+ assert(REG_R2 == REG_TMP_1);
+ regTemp = regSet.rsGrabReg(RBM_R2);
+ helperUsed = true;
+ }
+ else
+ {
+ genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
+ genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
+ genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
+
+ // Grab any temp register to use for our load/store sequences
+ //
+ regTemp = regSet.rsGrabReg(RBM_ALLINT);
+ helperUsed = false;
+ }
+ assert(dstObj->gtFlags & GTF_REG_VAL);
+ assert(srcObj->gtFlags & GTF_REG_VAL);
+
+ regDst = dstObj->gtRegNum;
+ regSrc = srcObj->gtRegNum;
+
+ assert(regDst != regTemp);
+ assert(regSrc != regTemp);
+
+ instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
+ instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
+
+ size_t offset = 0;
+ while (blkSize >= TARGET_POINTER_SIZE)
+ {
+ CorInfoGCType gcType;
+ CorInfoGCType gcTypeNext = TYPE_GC_NONE;
+ var_types type = TYP_I_IMPL;
+
+#if FEATURE_WRITE_BARRIER
+ gcType = (CorInfoGCType)(*gcPtrs++);
+ if (blkSize > TARGET_POINTER_SIZE)
+ gcTypeNext = (CorInfoGCType)(*gcPtrs);
+
+ if (gcType == TYPE_GC_REF)
+ type = TYP_REF;
+ else if (gcType == TYPE_GC_BYREF)
+ type = TYP_BYREF;
+
+ if (helperUsed)
+ {
+ assert(regDst == REG_ARG_0);
+ assert(regSrc == REG_ARG_1);
+ assert(regTemp == REG_R2);
+ }
+#else
+ gcType = TYPE_GC_NONE;
+#endif // FEATURE_WRITE_BARRIER
+
+ blkSize -= TARGET_POINTER_SIZE;
+
+ emitAttr opSize = emitTypeSize(type);
+
+ if (!helperUsed || (gcType == TYPE_GC_NONE))
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
+ getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
+ offset += TARGET_POINTER_SIZE;
+
+ if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
+ {
+ getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
+ getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
+ offset = 0;
+ }
+ }
+ else
+ {
+ assert(offset == 0);
+
+ // The helper will act like this:
+ // -- inputs R0 and R1 are byrefs
+ // -- helper will perform copy from *R1 into *R0
+ // -- helper will perform post increment of R0 and R1 by 4
+ // -- helper will trash R2
+ // -- helper will trash R3
+ // -- calling the helper implicitly trashes LR
+ //
+ assert(helperUsed);
+ regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
+ regSet.rsLockUsedReg(argRegs);
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+
+ regSet.rsUnlockUsedReg(argRegs);
+ regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
+ }
+ }
+
+ regTracker.rsTrackRegTrash(regDst);
+ regTracker.rsTrackRegTrash(regSrc);
+ regTracker.rsTrackRegTrash(regTemp);
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
+
+ /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
+ it is a emitNoGChelper. However, we have to let the emitter know that
+ the GC liveness has changed. We do this by creating a new label.
+ */
+
+ noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
+
+ genDefineTempLabel(&dummyBB);
+
+#endif // !CPU_USES_BLOCK_MOVE
+
+ assert(blkSize == 0);
+
+ genReleaseReg(dstObj);
+ genReleaseReg(srcObj);
+
+ genCodeForTree_DONE(tree, REG_NA);
+
+#ifdef _TARGET_ARM_
+ if (cpObjOp->IsVolatile())
+ {
+ // Emit a memory barrier instruction after the CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif
+}
+
+//------------------------------------------------------------------------
+// genCodeForBlkOp: Generate code for a block copy or init operation
+//
+// Arguments:
+// tree - The block assignment
+// destReg - The expected destination register
+//
+void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg)
+{
+ genTreeOps oper = tree->OperGet();
+ GenTreePtr dest = tree->gtOp.gtOp1;
+ GenTreePtr src = tree->gtGetOp2();
+ regMaskTP needReg = destReg;
+ regMaskTP regs = regSet.rsMaskUsed;
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
+ GenTreePtr destPtr;
+ GenTreePtr srcPtrOrVal;
+
+ noway_assert(tree->OperIsBlkOp());
+
+ bool isCopyBlk = false;
+ bool isInitBlk = false;
+ bool hasGCpointer = false;
+ unsigned blockSize = dest->AsBlk()->gtBlkSize;
+ GenTreePtr sizeNode = nullptr;
+ bool sizeIsConst = true;
+ if (dest->gtOper == GT_DYN_BLK)
+ {
+ sizeNode = dest->AsDynBlk()->gtDynamicSize;
+ sizeIsConst = false;
+ }
+
+ if (tree->OperIsCopyBlkOp())
+ {
+ isCopyBlk = true;
+ if (dest->gtOper == GT_OBJ)
+ {
+ if (dest->AsObj()->gtGcPtrCount != 0)
+ {
+ genCodeForCopyObj(tree, destReg);
+ return;
+ }
+ }
+ }
+ else
+ {
+ isInitBlk = true;
+ }
+
+ // Ensure that we have an address in the CopyBlk case.
+ if (isCopyBlk)
+ {
+ // TODO-1stClassStructs: Allow a lclVar here.
+ assert(src->OperIsIndir());
+ srcPtrOrVal = src->AsIndir()->Addr();
+ }
+ else
+ {
+ srcPtrOrVal = src;
+ }
+
+#ifdef _TARGET_ARM_
+ if (dest->AsBlk()->IsVolatile())
+ {
+ // Emit a memory barrier instruction before the InitBlk/CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif
+ {
+ destPtr = dest->AsBlk()->Addr();
+ noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
+ noway_assert(
+ (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
+ (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
+
+ noway_assert(destPtr && srcPtrOrVal);
+
+#if CPU_USES_BLOCK_MOVE
+ regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
+
+ /* Some special code for block moves/inits for constant sizes */
+
+ //
+ // Is this a fixed size COPYBLK?
+ // or a fixed size INITBLK with a constant init value?
+ //
+ if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
+ {
+ size_t length = blockSize;
+ size_t initVal = 0;
+ instruction ins_P, ins_PR, ins_B;
+
+ if (isInitBlk)
+ {
+ ins_P = INS_stosp;
+ ins_PR = INS_r_stosp;
+ ins_B = INS_stosb;
+
+ /* Properly extend the init constant from a U1 to a U4 */
+ initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
+
+ /* If it is a non-zero value we have to replicate */
+ /* the byte value four times to form the DWORD */
+ /* Then we change this new value into the tree-node */
+
+ if (initVal)
+ {
+ initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
+#ifdef _TARGET_64BIT_
+ if (length > 4)
+ {
+ initVal = initVal | (initVal << 32);
+ srcPtrOrVal->gtType = TYP_LONG;
+ }
+ else
+ {
+ srcPtrOrVal->gtType = TYP_INT;
+ }
+#endif // _TARGET_64BIT_
+ }
+ srcPtrOrVal->gtIntCon.gtIconVal = initVal;
+ }
+ else
+ {
+ ins_P = INS_movsp;
+ ins_PR = INS_r_movsp;
+ ins_B = INS_movsb;
+ }
+
+ // Determine if we will be using SSE2
+ unsigned movqLenMin = 8;
+ unsigned movqLenMax = 24;
+
+ bool bWillUseSSE2 = false;
+ bool bWillUseOnlySSE2 = false;
+ bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
+
+#ifdef _TARGET_64BIT_
+
+// Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
+// there is no point in wasting space on the bigger instructions
+
+#else // !_TARGET_64BIT_
+
+ if (compiler->opts.compCanUseSSE2)
+ {
+ unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
+
+ /* Adjust for BB weight */
+ if (curBBweight == BB_ZERO_WEIGHT)
+ {
+ // Don't bother with this optimization in
+ // rarely run blocks
+ movqLenMax = movqLenMin = 0;
+ }
+ else if (curBBweight < BB_UNITY_WEIGHT)
+ {
+ // Be less aggressive when we are inside a conditional
+ movqLenMax = 16;
+ }
+ else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+ {
+ // Be more aggressive when we are inside a loop
+ movqLenMax = 48;
+ }
+
+ if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
+ {
+ // Be more aggressive when optimizing for speed
+ // InitBlk uses fewer instructions
+ movqLenMax += 16;
+ }
+
+ if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
+ {
+ bWillUseSSE2 = true;
+
+ if ((length % 8) == 0)
+ {
+ bWillUseOnlySSE2 = true;
+ if (isInitBlk && (initVal == 0))
+ {
+ bNeedEvaluateCnst = false;
+ noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
+ }
+ }
+ }
+ }
+
+#endif // !_TARGET_64BIT_
+
+ const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
+ /* Evaluate dest and src/val */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (bNeedEvaluateCnst)
+ {
+ genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
+ }
+ genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
+ if (bNeedEvaluateCnst)
+ {
+ genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
+ }
+ }
+ else
+ {
+ genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
+ if (bNeedEvaluateCnst)
+ {
+ genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
+ }
+ genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
+ }
+
+ bool bTrashedESI = false;
+ bool bTrashedEDI = false;
+
+ if (bWillUseSSE2)
+ {
+ int blkDisp = 0;
+ regNumber xmmReg = REG_XMM0;
+
+ if (isInitBlk)
+ {
+ if (initVal)
+ {
+ getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
+ getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
+ }
+ }
+
+ JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
+ length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
+
+ while (length > 7)
+ {
+ if (isInitBlk)
+ {
+ getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
+ getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
+ }
+ blkDisp += 8;
+ length -= 8;
+ }
+
+ if (length > 0)
+ {
+ noway_assert(bNeedEvaluateCnst);
+ noway_assert(!bWillUseOnlySSE2);
+
+ if (isCopyBlk)
+ {
+ inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
+ bTrashedESI = true;
+ }
+
+ inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
+ bTrashedEDI = true;
+
+ if (length >= REGSIZE_BYTES)
+ {
+ instGen(ins_P);
+ length -= REGSIZE_BYTES;
+ }
+ }
+ }
+ else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
+ {
+ /* For small code, we can only use ins_DR to generate fast
+ and small code. We also can't use "rep movsb" because
+ we may not atomically reading and writing the DWORD */
+
+ noway_assert(bNeedEvaluateCnst);
+
+ goto USE_DR;
+ }
+ else if (length <= 4 * REGSIZE_BYTES)
+ {
+ noway_assert(bNeedEvaluateCnst);
+
+ while (length >= REGSIZE_BYTES)
+ {
+ instGen(ins_P);
+ length -= REGSIZE_BYTES;
+ }
+
+ bTrashedEDI = true;
+ if (isCopyBlk)
+ bTrashedESI = true;
+ }
+ else
+ {
+ USE_DR:
+ noway_assert(bNeedEvaluateCnst);
+
+ /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
+ genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
+
+ length &= (REGSIZE_BYTES - 1);
+
+ instGen(ins_PR);
+
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ bTrashedEDI = true;
+ if (isCopyBlk)
+ bTrashedESI = true;
+ }
+
+ /* Now take care of the remainder */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+ if (length > 4)
+ {
+ noway_assert(bNeedEvaluateCnst);
+ noway_assert(length < 8);
+
+ instGen((isInitBlk) ? INS_stosd : INS_movsd);
+ length -= 4;
+
+ bTrashedEDI = true;
+ if (isCopyBlk)
+ bTrashedESI = true;
+ }
+
+#endif // _TARGET_64BIT_
+
+ if (length)
+ {
+ noway_assert(bNeedEvaluateCnst);
+
+ while (length--)
+ {
+ instGen(ins_B);
+ }
+
+ bTrashedEDI = true;
+ if (isCopyBlk)
+ bTrashedESI = true;
+ }
+
+ noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
+ if (bTrashedEDI)
+ regTracker.rsTrackRegTrash(REG_EDI);
+ if (bTrashedESI)
+ regTracker.rsTrackRegTrash(REG_ESI);
+ // else No need to trash EAX as it wasnt destroyed by the "rep stos"
+
+ genReleaseReg(destPtr);
+ if (bNeedEvaluateCnst)
+ genReleaseReg(srcPtrOrVal);
+ }
+ else
+ {
+ //
+ // This a variable-sized COPYBLK/INITBLK,
+ // or a fixed size INITBLK with a variable init value,
+ //
+
+ // What order should the Dest, Val/Src, and Size be calculated
+
+ compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
+
+ noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
+ genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
+ genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
+ if (opsPtr[2] != nullptr)
+ {
+ genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
+ }
+ genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
+ genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
+
+ noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
+ (destPtr->gtRegNum == REG_EDI));
+
+ noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
+ (genRegMask(srcPtrOrVal->gtRegNum) == regs));
+
+ if (sizeIsConst)
+ {
+ inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
+ }
+ else
+ {
+ noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
+ (sizeNode->gtRegNum == REG_ECX));
+ }
+
+ if (isInitBlk)
+ instGen(INS_r_stosb);
+ else
+ instGen(INS_r_movsb);
+
+ regTracker.rsTrackRegTrash(REG_EDI);
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ if (isCopyBlk)
+ regTracker.rsTrackRegTrash(REG_ESI);
+ // else No need to trash EAX as it wasnt destroyed by the "rep stos"
+
+ genReleaseReg(opsPtr[0]);
+ genReleaseReg(opsPtr[1]);
+ if (opsPtr[2] != nullptr)
+ {
+ genReleaseReg(opsPtr[2]);
+ }
+ }
+
+#else // !CPU_USES_BLOCK_MOVE
+
+#ifndef _TARGET_ARM_
+// Currently only the ARM implementation is provided
+#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
+#endif
+ //
+ // Is this a fixed size COPYBLK?
+ // or a fixed size INITBLK with a constant init value?
+ //
+ if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
+ {
+ GenTreePtr dstOp = destPtr;
+ GenTreePtr srcOp = srcPtrOrVal;
+ unsigned length = blockSize;
+ unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
+ unsigned initVal = 0;
+ bool useLoop = false;
+
+ if (isInitBlk)
+ {
+ /* Properly extend the init constant from a U1 to a U4 */
+ initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
+
+ /* If it is a non-zero value we have to replicate */
+ /* the byte value four times to form the DWORD */
+ /* Then we store this new value into the tree-node */
+
+ if (initVal != 0)
+ {
+ initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
+ srcPtrOrVal->gtIntCon.gtIconVal = initVal;
+ }
+ }
+
+ // Will we be using a loop to implement this INITBLK/COPYBLK?
+ if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
+ {
+ useLoop = true;
+ }
+
+ regMaskTP usedRegs;
+ regNumber regDst;
+ regNumber regSrc;
+ regNumber regTemp;
+
+ /* Evaluate dest and src/val */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
+
+ genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(dstOp->gtFlags & GTF_REG_VAL);
+ regDst = dstOp->gtRegNum;
+
+ genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
+ regSrc = srcOp->gtRegNum;
+ }
+ else
+ {
+ genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(dstOp->gtFlags & GTF_REG_VAL);
+
+ genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
+ regSrc = srcOp->gtRegNum;
+
+ genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
+ regDst = dstOp->gtRegNum;
+ }
+ assert(dstOp->gtFlags & GTF_REG_VAL);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
+
+ regDst = dstOp->gtRegNum;
+ regSrc = srcOp->gtRegNum;
+ usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
+ bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
+ emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ emitAttr srcType;
+
+ if (isCopyBlk)
+ {
+ // Prefer a low register,but avoid one of the ones we've already grabbed
+ regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ usedRegs |= genRegMask(regTemp);
+ bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
+ srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ }
+ else
+ {
+ regTemp = REG_STK;
+ srcType = EA_PTRSIZE;
+ }
+
+ instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
+ instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
+
+ int finalOffset;
+
+ // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
+ if (!useLoop)
+ {
+ for (unsigned i = 0; i < fullStoreCount; i++)
+ {
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
+ }
+ }
+
+ finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
+ length -= finalOffset;
+ }
+ else // We will use a loop to implement this INITBLK/COPYBLK
+ {
+ unsigned pairStoreLoopCount = fullStoreCount / 2;
+
+ // We need a second temp register for CopyBlk
+ regNumber regTemp2 = REG_STK;
+ if (isCopyBlk)
+ {
+ // Prefer a low register, but avoid one of the ones we've already grabbed
+ regTemp2 =
+ regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ usedRegs |= genRegMask(regTemp2);
+ }
+
+ // Pick and initialize the loop counter register
+ regNumber regLoopIndex;
+ regLoopIndex =
+ regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
+
+ // Create and define the Basic Block for the loop top
+ BasicBlock* loopTopBlock = genCreateTempLabel();
+ genDefineTempLabel(loopTopBlock);
+
+ // The loop body
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
+ regTracker.rsTrackRegTrash(regSrc);
+ regTracker.rsTrackRegTrash(regTemp);
+ regTracker.rsTrackRegTrash(regTemp2);
+ }
+ else // isInitBlk
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
+ }
+
+ getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(regDst);
+ getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
+ emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
+ inst_JMP(jmpGTS, loopTopBlock);
+
+ regTracker.rsTrackRegIntCns(regLoopIndex, 0);
+
+ length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
+
+ if (length & TARGET_POINTER_SIZE)
+ {
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
+ }
+ finalOffset = TARGET_POINTER_SIZE;
+ length -= TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ finalOffset = 0;
+ }
+ }
+
+ if (length & sizeof(short))
+ {
+ loadIns = ins_Load(TYP_USHORT); // INS_ldrh
+ storeIns = ins_Store(TYP_USHORT); // INS_strh
+
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
+ }
+ length -= sizeof(short);
+ finalOffset += sizeof(short);
+ }
+
+ if (length & sizeof(char))
+ {
+ loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
+ storeIns = ins_Store(TYP_UBYTE); // INS_strb
+
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
+ }
+ length -= sizeof(char);
+ }
+ assert(length == 0);
+
+ genReleaseReg(dstOp);
+ genReleaseReg(srcOp);
+ }
+ else
+ {
+ //
+ // This a variable-sized COPYBLK/INITBLK,
+ // or a fixed size INITBLK with a variable init value,
+ //
+
+ // What order should the Dest, Val/Src, and Size be calculated
+
+ compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
+
+ genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ if (opsPtr[2] != nullptr)
+ {
+ genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ }
+ genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
+ genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
+
+ noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
+ (destPtr->gtRegNum == REG_ARG_0));
+
+ noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
+ (srcPtrOrVal->gtRegNum == REG_ARG_1));
+
+ if (sizeIsConst)
+ {
+ inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
+ }
+ else
+ {
+ noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
+ (sizeNode->gtRegNum == REG_ARG_2));
+ }
+
+ regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
+
+ genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
+ /* GT_INITBLK */
+ : CORINFO_HELP_MEMSET,
+ 0, EA_UNKNOWN);
+
+ regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
+
+ regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
+ genReleaseReg(opsPtr[0]);
+ genReleaseReg(opsPtr[1]);
+ if (opsPtr[2] != nullptr)
+ {
+ genReleaseReg(opsPtr[2]);
+ }
+ }
+
+ if (isCopyBlk && dest->AsBlk()->IsVolatile())
+ {
+ // Emit a memory barrier instruction after the CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif // !CPU_USES_BLOCK_MOVE
+ }
+}
+BasicBlock dummyBB;
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ const genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP regs = regSet.rsMaskUsed;
+ regMaskTP needReg = destReg;
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ emitAttr size;
+ instruction ins;
+ regMaskTP addrReg;
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
+
+#ifdef DEBUG
+ addrReg = 0xDEADCAFE;
+#endif
+
+ noway_assert(tree->OperKind() & GTK_SMPOP);
+
+ switch (oper)
+ {
+ case GT_ASG:
+ if (tree->OperIsBlkOp())
+ {
+ genCodeForBlkOp(tree, destReg);
+ }
+ else
+ {
+ genCodeForTreeSmpOpAsg(tree);
+ }
+ return;
+
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ genCodeForAsgShift(tree, destReg, bestReg);
+ return;
+
+ case GT_ASG_AND:
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
+ return;
+
+ case GT_CHS:
+ addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
+#ifdef _TARGET_XARCH_
+ // Note that the specialCase here occurs when the treeType specifies a byte sized operation
+ // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
+ //
+ bool specialCase;
+ specialCase = false;
+ if (op1->gtOper == GT_REG_VAR)
+ {
+ /* Get hold of the target register */
+
+ reg = op1->gtRegVar.gtRegNum;
+ if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, reg);
+ regTracker.rsTrackRegTrash(byteReg);
+
+ inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
+ var_types op1Type = op1->TypeGet();
+ instruction wideningIns = ins_Move_Extend(op1Type, true);
+ inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
+ regTracker.rsTrackRegTrash(reg);
+ specialCase = true;
+ }
+ }
+
+ if (!specialCase)
+ {
+ inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
+ }
+#else // not _TARGET_XARCH_
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
+ }
+ else
+ {
+ // Fix 388382 ARM JitStress WP7
+ var_types op1Type = op1->TypeGet();
+ regNumber reg = regSet.rsPickFreeReg();
+ inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
+ regTracker.rsTrackRegTrash(reg);
+ inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
+ inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
+ }
+#endif
+ if (op1->gtFlags & GTF_REG_VAL)
+ regTracker.rsTrackRegTrash(op1->gtRegNum);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
+ return;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
+ return;
+
+ case GT_UMOD:
+ genCodeForUnsignedMod(tree, destReg, bestReg);
+ return;
+
+ case GT_MOD:
+ genCodeForSignedMod(tree, destReg, bestReg);
+ return;
+
+ case GT_UDIV:
+ genCodeForUnsignedDiv(tree, destReg, bestReg);
+ return;
+
+ case GT_DIV:
+ genCodeForSignedDiv(tree, destReg, bestReg);
+ return;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ genCodeForShift(tree, destReg, bestReg);
+ return;
+
+ case GT_NEG:
+ case GT_NOT:
+
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ /* Negate/reverse the value in the register */
+
+ inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_IND:
+ case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
+
+ /* Make sure the operand is addressable */
+
+ addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
+
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+
+ /* Figure out the size of the value being loaded */
+
+ size = EA_ATTR(genTypeSize(tree->gtType));
+
+ /* Pick a register for the value */
+
+ if (needReg == RBM_ALLINT && bestReg == 0)
+ {
+ /* Absent a better suggestion, pick a useless register */
+
+ bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
+ }
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
+ {
+ noway_assert(size == EA_PTRSIZE);
+ getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
+ (int)op1->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
+
+ inst_mov_RV_ST(reg, tree);
+ }
+
+#ifdef _TARGET_ARM_
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Note the new contents of the register we used */
+
+ regTracker.rsTrackRegTrash(reg);
+
+#ifdef DEBUG
+ /* Update the live set of register variables */
+ if (compiler->opts.varNames)
+ genUpdateLife(tree);
+#endif
+
+ /* Now we can update the register pointer information */
+
+ // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+ genCodeForTree_DONE_LIFE(tree, reg);
+ return;
+
+ case GT_CAST:
+
+ genCodeForNumericCast(tree, destReg, bestReg);
+ return;
+
+ case GT_JTRUE:
+
+ /* Is this a test of a relational operator? */
+
+ if (op1->OperIsCompare())
+ {
+ /* Generate the conditional jump */
+
+ genCondJump(op1);
+
+ genUpdateLife(tree);
+ return;
+ }
+
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
+ break;
+
+ case GT_SWITCH:
+ genCodeForSwitch(tree);
+ return;
+
+ case GT_RETFILT:
+ noway_assert(tree->gtType == TYP_VOID || op1 != 0);
+ if (op1 == 0) // endfinally
+ {
+ reg = REG_NA;
+
+#ifdef _TARGET_XARCH_
+ /* Return using a pop-jmp sequence. As the "try" block calls
+ the finally with a jmp, this leaves the x86 call-ret stack
+ balanced in the normal flow of path. */
+
+ noway_assert(isFramePointerRequired());
+ inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+ inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
+#elif defined(_TARGET_ARM_)
+// Nothing needed for ARM
+#else
+ NYI("TARGET");
+#endif
+ }
+ else // endfilter
+ {
+ genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegNum == REG_INTRET);
+ /* The return value has now been computed */
+ reg = op1->gtRegNum;
+
+ /* Return */
+ instGen_Return(0);
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_RETURN:
+
+ // TODO: this should be done AFTER we called exit mon so that
+ // we are sure that we don't have to keep 'this' alive
+
+ if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+ {
+ /* either it's an "empty" statement or the return statement
+ of a synchronized method
+ */
+
+ genPInvokeMethodEpilog();
+ }
+
+ /* Is there a return value and/or an exit statement? */
+
+ if (op1)
+ {
+ if (op1->gtType == TYP_VOID)
+ {
+ // We're returning nothing, just generate the block (shared epilog calls).
+ genCodeForTree(op1, 0);
+ }
+#ifdef _TARGET_ARM_
+ else if (op1->gtType == TYP_STRUCT)
+ {
+ if (op1->gtOper == GT_CALL)
+ {
+ // We have a return call() because we failed to tail call.
+ // In any case, just generate the call and be done.
+ assert(compiler->IsHfa(op1));
+ genCodeForCall(op1, true);
+ genMarkTreeInReg(op1, REG_FLOATRET);
+ }
+ else
+ {
+ assert(op1->gtOper == GT_LCL_VAR);
+ assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
+ genLoadIntoFltRetRegs(op1);
+ }
+ }
+ else if (op1->TypeGet() == TYP_FLOAT)
+ {
+ // This can only occur when we are returning a non-HFA struct
+ // that is composed of a single float field and we performed
+ // struct promotion and enregistered the float field.
+ //
+ genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
+ }
+#endif // _TARGET_ARM_
+ else
+ {
+ // we can now go through this code for compiler->genReturnBB. I've regularized all the code.
+
+ // noway_assert(compiler->compCurBB != compiler->genReturnBB);
+
+ noway_assert(op1->gtType != TYP_VOID);
+
+ /* Generate the return value into the return register */
+
+ genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
+
+ /* The result must now be in the return register */
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegNum == REG_INTRET);
+ }
+
+ /* The return value has now been computed */
+
+ reg = op1->gtRegNum;
+
+ genCodeForTree_DONE(tree, reg);
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+ // the GT_RETURN tree.
+
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+#ifdef DEBUG
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+ return;
+
+ case GT_COMMA:
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (tree->gtType == TYP_VOID)
+ {
+ genEvalSideEffects(op2);
+ genUpdateLife(op2);
+ genEvalSideEffects(op1);
+ genUpdateLife(tree);
+ return;
+ }
+
+ // Generate op2
+ genCodeForTree(op2, needReg);
+ genUpdateLife(op2);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ regSet.rsMarkRegUsed(op2);
+
+ // Do side effects of op1
+ genEvalSideEffects(op1);
+
+ // Recover op2 if spilled
+ genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
+
+ regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
+
+ // set gc info if we need so
+ gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
+
+ genUpdateLife(tree);
+ genCodeForTree_DONE(tree, op2->gtRegNum);
+
+ return;
+ }
+ else
+ {
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Generate side effects of the first operand */
+
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+
+ /* Is the value of the second operand used? */
+
+ if (tree->gtType == TYP_VOID)
+ {
+ /* The right operand produces no result. The morpher is
+ responsible for resetting the type of GT_COMMA nodes
+ to TYP_VOID if op2 isn't meant to yield a result. */
+
+ genEvalSideEffects(op2);
+ genUpdateLife(tree);
+ return;
+ }
+
+ /* Generate the second operand, i.e. the 'real' value */
+
+ genCodeForTree(op2, needReg);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* The result of 'op2' is also the final result */
+
+ reg = op2->gtRegNum;
+
+ /* Remember whether we set the flags */
+
+ tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ case GT_BOX:
+ genCodeForTree(op1, needReg);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* The result of 'op1' is also the final result */
+
+ reg = op1->gtRegNum;
+
+ /* Remember whether we set the flags */
+
+ tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_QMARK:
+
+ genCodeForQmark(tree, destReg, bestReg);
+ return;
+
+ case GT_NOP:
+
+#if OPT_BOOL_OPS
+ if (op1 == NULL)
+ return;
+#endif
+
+ /* Generate the operand into some register */
+
+ genCodeForTree(op1, needReg);
+
+ /* The result is the same as the operand */
+
+ reg = op1->gtRegNum;
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_INTRINSIC:
+
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Round:
+ {
+ noway_assert(tree->gtType == TYP_INT);
+
+#if FEATURE_STACK_FP_X87
+ genCodeForTreeFlt(op1);
+
+ /* Store the FP value into the temp */
+ TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
+
+ FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+ FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
+ inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+ regTracker.rsTrackRegTrash(reg);
+
+ inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
+
+ compiler->tmpRlsTemp(temp);
+#else
+ genCodeForTreeFloat(tree, needReg, bestReg);
+ return;
+#endif
+ }
+ break;
+
+ default:
+ noway_assert(!"unexpected math intrinsic");
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_LCLHEAP:
+
+ reg = genLclHeap(op1);
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ genCodeForRelop(tree, destReg, bestReg);
+ return;
+
+ case GT_ADDR:
+
+ genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
+ return;
+
+#ifdef _TARGET_XARCH_
+ case GT_LOCKADD:
+
+ // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere.
+ // For reference, op1 is the location. op2 is the addend or the value.
+ if (op2->OperIsConst())
+ {
+ noway_assert(op2->TypeGet() == TYP_INT);
+ ssize_t cns = op2->gtIntCon.gtIconVal;
+
+ genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+ switch (cns)
+ {
+ case 1:
+ instGen(INS_lock);
+ instEmit_RM(INS_inc, op1, op1, 0);
+ break;
+ case -1:
+ instGen(INS_lock);
+ instEmit_RM(INS_dec, op1, op1, 0);
+ break;
+ default:
+ assert((int)cns == cns); // By test above for AMD64.
+ instGen(INS_lock);
+ inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
+ break;
+ }
+ genReleaseReg(op1);
+ }
+ else
+ {
+ // non constant addend means it needs to go into a register.
+ ins = INS_add;
+ goto LockBinOpCommon;
+ }
+
+ genFlagsEqualToNone(); // We didn't compute a result into a register.
+ genUpdateLife(tree); // We didn't compute an operand into anything.
+ return;
+
+ case GT_XADD:
+ ins = INS_xadd;
+ goto LockBinOpCommon;
+ case GT_XCHG:
+ ins = INS_xchg;
+ goto LockBinOpCommon;
+ LockBinOpCommon:
+ {
+ // Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2
+ // is a constant, it needs to be in a register. This should be the output register if
+ // possible.
+ //
+ // For reference, gtOp1 is the location. gtOp2 is the addend or the value.
+
+ GenTreePtr location = op1;
+ GenTreePtr value = op2;
+
+ // Again, a friendly reminder. IL calling convention is left to right.
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ // The atomic operations destroy this argument, so force it into a scratch register
+ reg = regSet.rsPickFreeReg();
+ genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+ // Must evaluate location into a register
+ genCodeForTree(location, needReg, RBM_NONE);
+ assert(location->gtFlags & GTF_REG_VAL);
+ regSet.rsMarkRegUsed(location);
+ regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
+ genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
+ regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
+
+ if (ins != INS_xchg)
+ {
+ // xchg implies the lock prefix, but xadd and add require it.
+ instGen(INS_lock);
+ }
+ instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+ genReleaseReg(value);
+ regTracker.rsTrackRegTrash(reg);
+ genReleaseReg(location);
+ }
+ else
+ {
+ regMaskTP addrReg;
+ if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
+ needReg, RegSet::KEEP_REG, &addrReg))
+ {
+ genUpdateLife(location);
+
+ reg = regSet.rsPickFreeReg();
+ genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+ addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
+
+ if (ins != INS_xchg)
+ {
+ // xchg implies the lock prefix, but xadd and add require it.
+ instGen(INS_lock);
+ }
+
+ // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+ // inst_TT_RV(ins, location, reg);
+ sched_AM(ins, EA_4BYTE, reg, false, location, 0);
+
+ genReleaseReg(value);
+ regTracker.rsTrackRegTrash(reg);
+ genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
+ }
+ else
+ {
+ // Must evalute location into a register.
+ genCodeForTree(location, needReg, RBM_NONE);
+ assert(location->gtFlags && GTF_REG_VAL);
+ regSet.rsMarkRegUsed(location);
+
+ // xadd destroys this argument, so force it into a scratch register
+ reg = regSet.rsPickFreeReg();
+ genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+ regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
+ genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
+ regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
+
+ if (ins != INS_xchg)
+ {
+ // xchg implies the lock prefix, but xadd and add require it.
+ instGen(INS_lock);
+ }
+
+ instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+
+ genReleaseReg(value);
+ regTracker.rsTrackRegTrash(reg);
+ genReleaseReg(location);
+ }
+ }
+
+ // The flags are equal to the target of the tree (i.e. the result of the add), not to the
+ // result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
+ // that information to set the flags. Doesn't seem like there is a good reason for that.
+ // Therefore, trash the flags.
+ genFlagsEqualToNone();
+
+ if (ins == INS_add)
+ {
+ // If the operator was add, then we were called from the GT_LOCKADD
+ // case. In that case we don't use the result, so we don't need to
+ // update anything.
+ genUpdateLife(tree);
+ }
+ else
+ {
+ genCodeForTree_DONE(tree, reg);
+ }
+ }
+ return;
+
+#else // !_TARGET_XARCH_
+
+ case GT_LOCKADD:
+ case GT_XADD:
+ case GT_XCHG:
+
+ NYI_ARM("LOCK instructions");
+#endif
+
+ case GT_ARR_LENGTH:
+ {
+ // Make the corresponding ind(a + c) node, and do codegen for that.
+ GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
+ compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
+ tree->SetOper(GT_IND);
+ tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
+ assert(tree->TypeGet() == TYP_INT);
+ tree->gtOp.gtOp1 = addr;
+ genCodeForTree(tree, destReg, bestReg);
+ return;
+ }
+
+ case GT_OBJ:
+ // All GT_OBJ nodes must have been morphed prior to this.
+ noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected unary/binary operator");
+ } // end switch (oper)
+
+ unreached();
+}
+#ifdef _PREFAST_
+#pragma warning(pop) // End suppress PREFast warning about overly large function
+#endif
+
+regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
+{
+ instruction ins;
+ emitAttr size;
+ bool unsv;
+ bool andv = false;
+ regNumber reg;
+ GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
+ var_types dstType = tree->CastToType();
+ var_types srcType = op1->TypeGet();
+
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ // Widening cast
+
+ /* we need the source size */
+
+ size = EA_ATTR(genTypeSize(srcType));
+
+ noway_assert(size < EA_PTRSIZE);
+
+ unsv = varTypeIsUnsigned(srcType);
+ ins = ins_Move_Extend(srcType, op1->InReg());
+
+ /*
+ Special case: for a cast of byte to char we first
+ have to expand the byte (w/ sign extension), then
+ mask off the high bits.
+ Use 'movsx' followed by 'and'
+ */
+ if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
+ {
+ noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
+ andv = true;
+ }
+ }
+ else
+ {
+ // Narrowing cast, or sign-changing cast
+
+ noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
+
+ size = EA_ATTR(genTypeSize(dstType));
+
+ unsv = varTypeIsUnsigned(dstType);
+ ins = ins_Move_Extend(dstType, op1->InReg());
+ }
+
+ noway_assert(size < EA_PTRSIZE);
+
+ // Set bestReg to the same register a op1 if op1 is a regVar and is available
+ if (op1->InReg())
+ {
+ regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
+ if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
+ {
+ bestReg = op1RegMask;
+ }
+ }
+
+ /* Is the value sitting in a non-byte-addressable register? */
+
+ if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
+ {
+ if (unsv)
+ {
+ // for unsigned values we can AND, so it needs not be a byte register
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ ins = INS_AND;
+ }
+ else
+ {
+ /* Move the value into a byte register */
+
+ reg = regSet.rsGrabReg(RBM_BYTE_REGS);
+ }
+
+ if (reg != op1->gtRegNum)
+ {
+ /* Move the value into that register */
+
+ regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
+ inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
+
+ /* The value has a new home now */
+
+ op1->gtRegNum = reg;
+ }
+ }
+ else
+ {
+ /* Pick a register for the value (general case) */
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ // if we (might) need to set the flags and the value is in the same register
+ // and we have an unsigned value then use AND instead of MOVZX
+ if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
+ {
+#ifdef _TARGET_X86_
+ noway_assert(ins == INS_movzx);
+#endif
+ ins = INS_AND;
+ }
+ }
+
+ if (ins == INS_AND)
+ {
+ noway_assert(andv == false && unsv);
+
+ /* Generate "and reg, MASK */
+
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+ }
+ else
+ {
+#ifdef _TARGET_XARCH_
+ noway_assert(ins == INS_movsx || ins == INS_movzx);
+#endif
+
+ /* Generate "movsx/movzx reg, [addr]" */
+
+ inst_RV_ST(ins, size, reg, op1);
+
+ /* Mask off high bits for cast from byte to char */
+
+ if (andv)
+ {
+#ifdef _TARGET_XARCH_
+ noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
+#endif
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+ }
+ }
+
+ regTracker.rsTrackRegTrash(reg);
+ return reg;
+}
+
+void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types dstType = tree->CastToType();
+ var_types baseType = TYP_INT;
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP needReg = destReg;
+ regMaskTP addrReg;
+ emitAttr size;
+ BOOL unsv;
+
+ /*
+ * Constant casts should have been folded earlier
+ * If not finite don't bother
+ * We don't do this optimization for debug code/no optimization
+ */
+
+ noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) ||
+ tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
+ !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
+
+ noway_assert(dstType != TYP_VOID);
+
+ /* What type are we casting from? */
+
+ switch (op1->TypeGet())
+ {
+ case TYP_LONG:
+
+ /* Special case: the long is generated via the mod of long
+ with an int. This is really an int and need not be
+ converted to a reg pair. NOTE: the flag only indicates
+ that this is a case to TYP_INT, it hasn't actually
+ verified the second operand of the MOD! */
+
+ if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
+ {
+
+ /* Verify that the op2 of the mod node is
+ 1) An integer tree, or
+ 2) A long constant that is small enough to fit in an integer
+ */
+
+ GenTreePtr modop2 = op1->gtOp.gtOp2;
+ if ((genActualType(modop2->gtType) == TYP_INT) ||
+ ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
+ {
+ genCodeForTree(op1, destReg, bestReg);
+
+#ifdef _TARGET_64BIT_
+ reg = op1->gtRegNum;
+#else // _TARGET_64BIT_
+ reg = genRegPairLo(op1->gtRegPair);
+#endif //_TARGET_64BIT_
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+ }
+
+ /* Make the operand addressable. When gtOverflow() is true,
+ hold on to the addrReg as we will need it to access the higher dword */
+
+ op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
+ // twice!)
+ // See, e.g., the TYP_INT case below...
+
+ addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
+
+ /* Load the lower half of the value into some register */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ /* Can we simply use the low part of the value? */
+ reg = genRegPairLo(op1->gtRegPair);
+
+ if (tree->gtOverflow())
+ goto REG_OK;
+
+ regMaskTP loMask;
+ loMask = genRegMask(reg);
+ if (loMask & regSet.rsRegMaskFree())
+ bestReg = loMask;
+ }
+
+ // for cast overflow we need to preserve addrReg for testing the hiDword
+ // so we lock it to prevent regSet.rsPickReg from picking it.
+ if (tree->gtOverflow())
+ regSet.rsLockUsedReg(addrReg);
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ if (tree->gtOverflow())
+ regSet.rsUnlockUsedReg(addrReg);
+
+ noway_assert(genStillAddressable(op1));
+
+ REG_OK:
+ if (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair)))
+ {
+ /* Generate "mov reg, [addr-mode]" */
+ inst_RV_TT(ins_Load(TYP_INT), reg, op1);
+ }
+
+ /* conv.ovf.i8i4, or conv.ovf.u8u4 */
+
+ if (tree->gtOverflow())
+ {
+ regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair) : REG_NA;
+
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+
+ switch (dstType)
+ {
+ case TYP_INT:
+ // conv.ovf.i8.i4
+ /* Generate the following sequence
+
+ test loDWord, loDWord // set flags
+ jl neg
+ pos: test hiDWord, hiDWord // set flags
+ jne ovf
+ jmp done
+ neg: cmp hiDWord, 0xFFFFFFFF
+ jne ovf
+ done:
+
+ */
+
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
+ if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0)
+ {
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ goto UPPER_BITS_ZERO;
+ }
+
+#if CPU_LOAD_STORE_ARCH
+ // This is tricky.
+ // We will generate code like
+ // if (...)
+ // {
+ // ...
+ // }
+ // else
+ // {
+ // ...
+ // }
+ // We load the tree op1 into regs when we generate code for if clause.
+ // When we generate else clause, we see the tree is already loaded into reg, and start use it
+ // directly.
+ // Well, when the code is run, we may execute else clause without going through if clause.
+ //
+ genCodeForTree(op1, 0);
+#endif
+
+ BasicBlock* neg;
+ BasicBlock* done;
+
+ neg = genCreateTempLabel();
+ done = genCreateTempLabel();
+
+ // Is the loDWord positive or negative
+ inst_JMP(jmpLTS, neg);
+
+ // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
+
+ if (hiReg < REG_STK)
+ {
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
+ }
+
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+ inst_JMP(EJ_jmp, done);
+
+ // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
+
+ genDefineTempLabel(neg);
+
+ if (hiReg < REG_STK)
+ {
+ inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
+ }
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+
+ // Done
+
+ genDefineTempLabel(done);
+
+ break;
+
+ case TYP_UINT: // conv.ovf.u8u4
+ UPPER_BITS_ZERO:
+ // Just check that the upper DWord is 0
+
+ if (hiReg < REG_STK)
+ {
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, 0, 4);
+ }
+
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+ break;
+
+ default:
+ noway_assert(!"Unexpected dstType");
+ break;
+ }
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ }
+
+ regTracker.rsTrackRegTrash(reg);
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_SHORT:
+ case TYP_CHAR:
+ case TYP_UBYTE:
+ break;
+
+ case TYP_UINT:
+ case TYP_INT:
+ break;
+
+#if FEATURE_STACK_FP_X87
+ case TYP_FLOAT:
+ NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
+ break;
+
+ case TYP_DOUBLE:
+ if (compiler->opts.compCanUseSSE2)
+ {
+ // do the SSE2 based cast inline
+ // getting the fp operand
+
+ regMaskTP addrRegInt = 0;
+ regMaskTP addrRegFlt = 0;
+
+ // make the operand addressable
+ // We don't want to collapse constant doubles into floats, as the SSE2 instruction
+ // operates on doubles. Note that these (casts from constant doubles) usually get
+ // folded, but we don't do it for some cases (infinitys, etc). So essentially this
+ // shouldn't affect performance or size at all. We're fixing this for #336067
+ op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
+ if (!addrRegFlt && !op1->IsRegVar())
+ {
+ // we have the address
+
+ inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
+ genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+ genUpdateLife(op1);
+
+ reg = regSet.rsPickReg(needReg);
+ getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
+
+ regTracker.rsTrackRegTrash(reg);
+ genCodeForTree_DONE(tree, reg);
+ }
+ else
+ {
+ // we will need to use a temp to get it into the xmm reg
+ var_types typeTemp = op1->TypeGet();
+ TempDsc* temp = compiler->tmpGetTemp(typeTemp);
+
+ size = EA_ATTR(genTypeSize(typeTemp));
+
+ if (addrRegFlt)
+ {
+ // On the fp stack; Take reg to top of stack
+
+ FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+ }
+ else
+ {
+ // op1->IsRegVar()
+ // pick a register
+ reg = regSet.PickRegFloat();
+ if (!op1->IsRegVarDeath())
+ {
+ // Load it on the fp stack
+ genLoadStackFP(op1, reg);
+ }
+ else
+ {
+ // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
+ genLoadStackFP(op1, reg);
+ FlatFPX87_MoveToTOS(&compCurFPState, reg);
+ }
+ }
+
+ // pop it off the fp stack
+ compCurFPState.Pop();
+
+ getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
+ // pick a reg
+ reg = regSet.rsPickReg(needReg);
+
+ inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
+ getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
+
+ // done..release the temp
+ compiler->tmpRlsTemp(temp);
+
+ // the reg is now trashed
+ regTracker.rsTrackRegTrash(reg);
+ genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+ genUpdateLife(op1);
+ genCodeForTree_DONE(tree, reg);
+ }
+ }
+#else
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ genCodeForTreeFloat(tree, needReg, bestReg);
+#endif // FEATURE_STACK_FP_X87
+ return;
+
+ default:
+ noway_assert(!"unexpected cast type");
+ }
+
+ if (tree->gtOverflow())
+ {
+ /* Compute op1 into a register, and free the register */
+
+ genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
+ reg = op1->gtRegNum;
+
+ /* Do we need to compare the value, or just check masks */
+
+ ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
+ ssize_t typeMask;
+
+ switch (dstType)
+ {
+ case TYP_BYTE:
+ typeMask = ssize_t((int)0xFFFFFF80);
+ typeMin = SCHAR_MIN;
+ typeMax = SCHAR_MAX;
+ unsv = (tree->gtFlags & GTF_UNSIGNED);
+ break;
+ case TYP_SHORT:
+ typeMask = ssize_t((int)0xFFFF8000);
+ typeMin = SHRT_MIN;
+ typeMax = SHRT_MAX;
+ unsv = (tree->gtFlags & GTF_UNSIGNED);
+ break;
+ case TYP_INT:
+ typeMask = ssize_t((int)0x80000000L);
+#ifdef _TARGET_64BIT_
+ unsv = (tree->gtFlags & GTF_UNSIGNED);
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
+#else // _TARGET_64BIT_
+ noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
+ unsv = true;
+#endif // _TARGET_64BIT_
+ break;
+ case TYP_UBYTE:
+ unsv = true;
+ typeMask = ssize_t((int)0xFFFFFF00L);
+ break;
+ case TYP_CHAR:
+ unsv = true;
+ typeMask = ssize_t((int)0xFFFF0000L);
+ break;
+ case TYP_UINT:
+ unsv = true;
+#ifdef _TARGET_64BIT_
+ typeMask = 0xFFFFFFFF00000000LL;
+#else // _TARGET_64BIT_
+ typeMask = 0x80000000L;
+ noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
+#endif // _TARGET_64BIT_
+ break;
+ default:
+ NO_WAY("Unknown type");
+ return;
+ }
+
+ // If we just have to check a mask.
+ // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
+ // or conv.i4u4
+
+ if (unsv)
+ {
+ inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+ }
+ else
+ {
+ // Check the value is in range.
+ // This must be conv.ovf.i4i1, etc.
+
+ // Compare with the MAX
+
+ noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
+
+ inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
+ emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
+
+ // Compare with the MIN
+
+ inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ /* Make the operand addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
+
+ reg = genIntegerCast(tree, needReg, bestReg);
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a leaf node of type GT_ADDR
+ */
+
+void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1;
+ regNumber reg;
+ regMaskTP needReg = destReg;
+ regMaskTP addrReg;
+
+#ifdef DEBUG
+ reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use
+ addrReg = 0xDEADCAFE;
+#endif
+
+ // We should get here for ldloca, ldarga, ldslfda, ldelema,
+ // or ldflda.
+ if (oper == GT_ARR_ELEM)
+ {
+ op1 = tree;
+ }
+ else
+ {
+ op1 = tree->gtOp.gtOp1;
+ }
+
+ // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
+ if (oper == GT_ARR_ELEM)
+ {
+ // To get the address of the array element,
+ // we first call genMakeAddrArrElem to make the element addressable.
+ // (That is, for example, we first emit code to calculate EBX, and EAX.)
+ // And then use lea to obtain the address.
+ // (That is, for example, we then emit
+ // lea EBX, bword ptr [EBX+4*EAX+36]
+ // to obtain the address of the array element.)
+ addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
+ }
+ else
+ {
+ addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
+ }
+
+ noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
+
+ // We want to reuse one of the scratch registers that were used
+ // in forming the address mode as the target register for the lea.
+ // If bestReg is unset or if it is set to one of the registers used to
+ // form the address (i.e. addrReg), we calculate the scratch register
+ // to use as the target register for the LEA
+
+ bestReg = regSet.rsUseIfZero(bestReg, addrReg);
+ bestReg = regSet.rsNarrowHint(bestReg, addrReg);
+
+ /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
+ it since keepReg==false.
+ If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
+ So this is guaranteed not to spill addrReg */
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ // Slight workaround, force the inst routine to think that
+ // value being loaded is an int (since that is what what
+ // LEA will return) otherwise it would try to allocate
+ // two registers for a long etc.
+ noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
+ op1->gtType = treeType;
+
+ inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
+
+ // The Lea instruction above better not have tried to put the
+ // 'value' pointed to by 'op1' in a register, LEA will not work.
+ noway_assert(!(op1->gtFlags & GTF_REG_VAL));
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+ // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
+
+ regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+#ifdef _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Move (load/store) between float ret regs and struct promoted variable.
+ *
+ * varDsc - The struct variable to be loaded from or stored into.
+ * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
+ *
+ */
+void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
+{
+ regNumber curReg = REG_FLOATRET;
+
+ unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
+ for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
+ {
+ LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
+
+ // Is the struct field promoted and sitting in a register?
+ if (varDscFld->lvRegister)
+ {
+ // Move from the struct field into curReg if load
+ // else move into struct field from curReg if store
+ regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
+ regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
+ if (srcReg != dstReg)
+ {
+ inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
+ regTracker.rsTrackRegCopy(dstReg, srcReg);
+ }
+ }
+ else
+ {
+ // This field is in memory, do a move between the field and float registers.
+ emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
+ if (isLoadIntoFlt)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
+ regTracker.rsTrackRegTrash(curReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
+ }
+ }
+
+ // Advance the current reg.
+ curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
+ }
+}
+
+void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
+{
+ assert(tree->TypeGet() == TYP_STRUCT);
+ assert(tree->gtOper == GT_LCL_VAR);
+ LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+ int slots = varDsc->lvSize() / REGSIZE_BYTES;
+ if (varDsc->lvPromoted)
+ {
+ genLdStFltRetRegsPromotedVar(varDsc, true);
+ }
+ else
+ {
+ if (slots <= 2)
+ {
+ // Use the load float/double instruction.
+ inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
+ (slots == 1) ? EA_4BYTE : EA_8BYTE);
+ }
+ else
+ {
+ // Use the load store multiple instruction.
+ regNumber reg = regSet.rsPickReg(RBM_ALLINT);
+ inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
+ regTracker.rsTrackRegTrash(reg);
+ getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
+ }
+ }
+ genMarkTreeInReg(tree, REG_FLOATRET);
+}
+
+void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
+{
+ assert(tree->TypeGet() == TYP_STRUCT);
+ assert(tree->OperGet() == GT_ASG);
+
+ // LHS should be lcl var or fld.
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
+ // handling multiple levels of inlined functions that return HFA on the right-hand-side.
+ // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
+ // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
+ // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
+ // as a regular assert().
+ noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
+
+ // The RHS should be a call.
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ assert(op2->gtOper == GT_CALL);
+
+ // Generate code for call and copy the return registers into the local.
+ regMaskTP retMask = genCodeForCall(op2, true);
+
+ // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
+ assert((mask & (mask - 1)) == 0);
+ assert(mask <= (1 << MAX_HFA_RET_SLOTS));
+ assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
+#endif
+
+ int slots = genCountBits(retMask & RBM_ALLFLOAT);
+
+ LclVarDsc* varDsc = &compiler->lvaTable[varNum];
+
+ if (varDsc->lvPromoted)
+ {
+ genLdStFltRetRegsPromotedVar(varDsc, false);
+ }
+ else
+ {
+ if (slots <= 2)
+ {
+ inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
+ (slots == 1) ? EA_4BYTE : EA_8BYTE);
+ }
+ else
+ {
+ regNumber reg = regSet.rsPickReg(RBM_ALLINT);
+ inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
+ regTracker.rsTrackRegTrash(reg);
+ getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
+ }
+ }
+}
+
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Generate code for a GT_ASG tree
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_ASG);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ regMaskTP needReg = RBM_ALLINT;
+ regMaskTP bestReg = RBM_CORRUPT;
+ regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT);
+ bool ovfl = false; // Do we need an overflow check
+ bool volat = false; // Is this a volatile store
+ regMaskTP regGC;
+ instruction ins;
+#ifdef DEBUGGING_SUPPORT
+ unsigned lclVarNum = compiler->lvaCount;
+ unsigned lclILoffs = DUMMY_INIT(0);
+#endif
+
+#ifdef _TARGET_ARM_
+ if (tree->gtType == TYP_STRUCT)
+ {
+ // We use copy block to assign structs, however to receive HFAs in registers
+ // from a CALL, we use assignment, var = (hfa) call();
+ assert(compiler->IsHfa(tree));
+ genStoreFromFltRetRegs(tree);
+ return;
+ }
+#endif
+
+#ifdef DEBUG
+ if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
+ {
+ if (varTypeIsFloating(op1))
+ assert(!"Bad IL: Illegal assignment of integer into float!");
+ else
+ assert(!"Bad IL: Illegal assignment of float into integer!");
+ }
+#endif
+
+ if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+ {
+ op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
+ }
+
+ /* Is the target a register or local variable? */
+ switch (op1->gtOper)
+ {
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ case GT_LCL_VAR:
+ varNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ * Remember the local var info to call siCheckVarScope
+ * AFTER code generation of the assignment.
+ */
+ if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
+ {
+ lclVarNum = varNum;
+ lclILoffs = op1->gtLclVar.gtLclILoffs;
+ }
+#endif
+
+ /* Check against dead store ? (with min opts we may have dead stores) */
+
+ noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
+
+ /* Does this variable live in a register? */
+
+ if (genMarkLclVar(op1))
+ goto REG_VAR2;
+
+ break;
+
+ REG_VAR2:
+
+ /* Get hold of the target register */
+
+ regNumber op1Reg;
+
+ op1Reg = op1->gtRegVar.gtRegNum;
+
+#ifdef DEBUG
+ /* Compute the RHS (hopefully) into the variable's register.
+ For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
+ as variables are kept alive everywhere. So we have to be
+ careful if we want to compute the value directly into
+ the variable's register. */
+
+ bool needToUpdateRegSetCheckLevel;
+ needToUpdateRegSetCheckLevel = false;
+#endif
+
+ // We should only be accessing lvVarIndex if varDsc is tracked.
+ assert(varDsc->lvTracked);
+
+ if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
+ {
+ noway_assert(compiler->opts.compDbgCode);
+
+ /* The predictor might expect us to generate op2 directly
+ into the var's register. However, since the variable is
+ already alive, first kill it and its register. */
+
+ if (rpCanAsgOperWithoutReg(op2, true))
+ {
+ genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
+ needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
+#ifdef DEBUG
+ needToUpdateRegSetCheckLevel = true;
+#endif
+ }
+ }
+ else
+ {
+ needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
+ }
+
+#ifdef DEBUG
+
+ /* Special cases: op2 is a GT_CNS_INT */
+
+ if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
+ {
+ /* Save the old life status */
+
+ VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
+ VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
+
+ /* Set a flag to avoid printing the message
+ and remember that life was changed. */
+
+ genTempLiveChg = false;
+ }
+#endif
+
+#ifdef DEBUG
+ if (needToUpdateRegSetCheckLevel)
+ compiler->compRegSetCheckLevel++;
+#endif
+ genCodeForTree(op2, needReg, genRegMask(op1Reg));
+#ifdef DEBUG
+ if (needToUpdateRegSetCheckLevel)
+ compiler->compRegSetCheckLevel--;
+ noway_assert(compiler->compRegSetCheckLevel >= 0);
+#endif
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make sure the value ends up in the right place ... */
+
+ if (op2->gtRegNum != op1Reg)
+ {
+ /* Make sure the target of the store is available */
+
+ if (regSet.rsMaskUsed & genRegMask(op1Reg))
+ regSet.rsSpillReg(op1Reg);
+
+#ifdef _TARGET_ARM_
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ // This can only occur when we are returning a non-HFA struct
+ // that is composed of a single float field.
+ //
+ inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
+ }
+ else
+#endif // _TARGET_ARM_
+ {
+ inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
+ }
+
+ /* The value has been transferred to 'op1Reg' */
+
+ regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
+
+ if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
+
+ gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
+ }
+ else
+ {
+ // First we need to remove it from the original reg set mask (or else trigger an
+ // assert when we add it to the other reg set mask).
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+ gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
+
+ // The emitter has logic that tracks the GCness of registers and asserts if you
+ // try to do bad things to a GC pointer (like lose its GCness).
+
+ // An explict cast of a GC pointer to an int (which is legal if the
+ // pointer is pinned) is encoded as an assignment of a GC source
+ // to a integer variable. Unfortunately if the source was the last
+ // use, and the source register gets reused by the destination, no
+ // code gets emitted (That is where we are at right now). The emitter
+ // thinks the register is a GC pointer (it did not see the cast).
+ // This causes asserts, as well as bad GC info since we will continue
+ // to report the register as a GC pointer even if we do arithmetic
+ // with it. So force the emitter to see the change in the type
+ // of variable by placing a label.
+ // We only have to do this check at this point because in the
+ // CAST morphing, we create a temp and assignment whenever we
+ // have a cast that loses its GCness.
+
+ if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
+ {
+ void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur);
+ }
+ }
+
+ addrReg = 0;
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
+ goto LExit;
+
+ case GT_LCL_FLD:
+
+ // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
+ break;
+
+ case GT_CLS_VAR:
+
+ __fallthrough;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+
+ assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
+
+ if (op1->gtFlags & GTF_IND_VOLATILE)
+ {
+ volat = true;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Is the value being assigned a simple one? */
+
+ noway_assert(op2);
+ switch (op2->gtOper)
+ {
+ case GT_LCL_VAR:
+
+ if (!genMarkLclVar(op2))
+ goto SMALL_ASG;
+
+ __fallthrough;
+
+ case GT_REG_VAR:
+
+ /* Is the target a byte/short/char value? */
+
+ if (varTypeIsSmall(op1->TypeGet()))
+ goto SMALL_ASG;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ goto SMALL_ASG;
+
+ /* Make the target addressable */
+
+ op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+ /* Does the write barrier helper do the assignment? */
+
+ regGC = WriteBarrier(op1, op2, addrReg);
+
+ // Was assignment done by the WriteBarrier
+ if (regGC == RBM_NONE)
+ {
+#ifdef _TARGET_ARM_
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
+
+ // This is done in WriteBarrier when (regGC != RBM_NONE)
+
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ }
+
+ /* Free up the RHS */
+ genUpdateLife(op2);
+
+ /* Remember that we've also touched the op2 register */
+
+ addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
+ break;
+
+ case GT_CNS_INT:
+
+ ssize_t ival;
+ ival = op2->gtIntCon.gtIconVal;
+ emitAttr size;
+ size = emitTypeSize(tree->TypeGet());
+
+ ins = ins_Store(op1->TypeGet());
+
+ // If we are storing a constant into a local variable
+ // we extend the size of the store here
+ // this normally takes place in CodeGen::inst_TT_IV on x86.
+ //
+ if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+ // Fix the immediate by sign extending if needed
+ if (!varTypeIsUnsigned(varDsc->TypeGet()))
+ {
+ if (size == EA_1BYTE)
+ {
+ if ((ival & 0x7f) != ival)
+ ival = ival | 0xffffff00;
+ }
+ else
+ {
+ assert(size == EA_2BYTE);
+ if ((ival & 0x7fff) != ival)
+ ival = ival | 0xffff0000;
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardless of
+ // what the local var is typed as, so auto-promote it here
+ // unless it is a field of a promoted struct
+ if (!varDsc->lvIsStructField)
+ {
+ size = EA_SET_SIZE(size, EA_4BYTE);
+ ins = ins_Store(TYP_INT);
+ }
+ }
+
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+#ifdef _TARGET_ARM_
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Move the value into the target */
+
+ noway_assert(op1->gtOper != GT_REG_VAR);
+ if (compiler->opts.compReloc && op2->IsIconHandle())
+ {
+ /* The constant is actually a handle that may need relocation
+ applied to it. genComputeReg will do the right thing (see
+ code in genCodeForTreeConst), so we'll just call it to load
+ the constant into a register. */
+
+ genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+ addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ inst_TT_RV(ins, op1, op2->gtRegNum);
+ genReleaseReg(op2);
+ }
+ else
+ {
+ regSet.rsLockUsedReg(addrReg);
+
+#if REDUNDANT_LOAD
+ bool copyIconFromReg = true;
+ regNumber iconReg = REG_NA;
+
+#ifdef _TARGET_ARM_
+ // Only if the constant can't be encoded in a small instruction,
+ // look for another register to copy the value from. (Assumes
+ // target is a small register.)
+ if ((op1->gtFlags & GTF_REG_VAL) && !isRegPairType(tree->gtType) &&
+ arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
+ {
+ copyIconFromReg = false;
+ }
+#endif // _TARGET_ARM_
+
+ if (copyIconFromReg)
+ {
+ iconReg = regTracker.rsIconIsInReg(ival);
+ if (iconReg == REG_NA)
+ copyIconFromReg = false;
+ }
+
+ if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
+ (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
+ {
+ /* Move the value into the target */
+
+ inst_TT_RV(ins, op1, iconReg, 0, size);
+ }
+ else
+#endif // REDUNDANT_LOAD
+ {
+ inst_TT_IV(ins, op1, ival, 0, size);
+ }
+
+ regSet.rsUnlockUsedReg(addrReg);
+ }
+
+ /* Free up anything that was tied up by the LHS */
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ break;
+
+ default:
+
+ SMALL_ASG:
+
+ bool isWriteBarrier = false;
+ regMaskTP needRegOp1 = RBM_ALLINT;
+ RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
+
+ /* Is the LHS more complex than the RHS? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Is the target a byte/short/char value? */
+
+ if (varTypeIsSmall(op1->TypeGet()))
+ {
+ noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
+ // TODO: Why does this have to be true?
+ compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
+ compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
+
+ if (op2->gtOper == GT_CAST && !op2->gtOverflow())
+ {
+ /* Special case: cast to small type */
+
+ if (op2->CastToType() >= op1->gtType)
+ {
+ /* Make sure the cast operand is not > int */
+
+ if (op2->CastFromType() <= TYP_INT)
+ {
+ /* Cast via a non-smaller type */
+
+ op2 = op2->gtCast.CastOp();
+ }
+ }
+ }
+
+ if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ unsigned mask;
+ switch (op1->gtType)
+ {
+ case TYP_BYTE:
+ mask = 0x000000FF;
+ break;
+ case TYP_SHORT:
+ mask = 0x0000FFFF;
+ break;
+ case TYP_CHAR:
+ mask = 0x0000FFFF;
+ break;
+ default:
+ goto SIMPLE_SMALL;
+ }
+
+ if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
+ {
+ /* Redundant AND */
+
+ op2 = op2->gtOp.gtOp1;
+ }
+ }
+
+ /* Must get the new value into a byte register */
+
+ SIMPLE_SMALL:
+ if (varTypeIsByte(op1->TypeGet()))
+ genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ else
+ goto NOT_SMALL;
+ }
+ else
+ {
+ NOT_SMALL:
+ /* Generate the RHS into a register */
+
+ isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
+ if (isWriteBarrier)
+ {
+#if NOGC_WRITE_BARRIERS
+ // Exclude the REG_WRITE_BARRIER from op2's needReg mask
+ needReg = Target::exclude_WriteBarrierReg(needReg);
+ mustReg = RegSet::EXACT_REG;
+#else // !NOGC_WRITE_BARRIERS
+ // This code should be generic across architectures.
+
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ needRegOp1 = RBM_ARG_0;
+ needReg = RBM_ARG_1;
+#endif // !NOGC_WRITE_BARRIERS
+ }
+ genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
+ }
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make the target addressable */
+
+ op1 = genCodeForCommaTree(op1); // Strip off any comma expressions.
+ addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+ /* Make sure the RHS register hasn't been spilled;
+ keep the register marked as "used", otherwise
+ we might get the pointer lifetimes wrong.
+ */
+
+ if (varTypeIsByte(op1->TypeGet()))
+ needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+
+ genRecoverReg(op2, needReg, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Lock the RHS temporarily (lock only already used) */
+
+ regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
+
+ /* Make sure the LHS is still addressable */
+
+ addrReg = genKeepAddressable(op1, addrReg);
+
+ /* We can unlock (only already used ) the RHS register */
+
+ regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
+
+ /* Does the write barrier helper do the assignment? */
+
+ regGC = WriteBarrier(op1, op2, addrReg);
+
+ if (regGC != 0)
+ {
+ // Yes, assignment done by the WriteBarrier
+ noway_assert(isWriteBarrier);
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
+ }
+
+#ifdef DEBUG
+ /* Update the current liveness info */
+ if (compiler->opts.varNames)
+ genUpdateLife(tree);
+#endif
+
+ // If op2 register is still in use, free it. (Might not be in use, if
+ // a full-call write barrier was done, and the register was a caller-saved
+ // register.)
+ regMaskTP op2RM = genRegMask(op2->gtRegNum);
+ if (op2RM & regSet.rsMaskUsed)
+ regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
+
+ // This is done in WriteBarrier when (regGC != 0)
+ if (regGC == 0)
+ {
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ }
+ }
+ else
+ {
+ /* Make the target addressable */
+
+ isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
+
+ if (isWriteBarrier)
+ {
+#if NOGC_WRITE_BARRIERS
+ /* Try to avoid RBM_TMP_0 */
+ needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
+ mustReg = RegSet::EXACT_REG; // For op2
+#else // !NOGC_WRITE_BARRIERS
+ // This code should be generic across architectures.
+
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ needRegOp1 = RBM_ARG_0;
+ needReg = RBM_ARG_1;
+ mustReg = RegSet::EXACT_REG; // For op2
+#endif // !NOGC_WRITE_BARRIERS
+ }
+
+ needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
+
+ op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
+
+ addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+#if CPU_HAS_BYTE_REGS
+ /* Is the target a byte value? */
+ if (varTypeIsByte(op1->TypeGet()))
+ {
+ /* Must get the new value into a byte register */
+ needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+ mustReg = RegSet::EXACT_REG;
+
+ if (op2->gtType >= op1->gtType)
+ op2->gtFlags |= GTF_SMALL_OK;
+ }
+#endif
+
+#if NOGC_WRITE_BARRIERS
+ /* For WriteBarrier we can't use REG_WRITE_BARRIER */
+ if (isWriteBarrier)
+ needReg = Target::exclude_WriteBarrierReg(needReg);
+
+ /* Also avoid using the previously computed addrReg(s) */
+ bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
+
+ /* If we have a reg available to grab then use bestReg */
+ if (bestReg & regSet.rsRegMaskCanGrab())
+ needReg = bestReg;
+
+ mustReg = RegSet::EXACT_REG;
+#endif
+
+ /* Generate the RHS into a register */
+ genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make sure the target is still addressable */
+ addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Does the write barrier helper do the assignment? */
+
+ regGC = WriteBarrier(op1, op2, addrReg);
+
+ if (regGC != 0)
+ {
+ // Yes, assignment done by the WriteBarrier
+ noway_assert(isWriteBarrier);
+ }
+ else
+ {
+ assert(!isWriteBarrier);
+
+#ifdef _TARGET_ARM_
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
+ }
+
+ /* The new value is no longer needed */
+
+ genReleaseReg(op2);
+
+#ifdef DEBUG
+ /* Update the current liveness info */
+ if (compiler->opts.varNames)
+ genUpdateLife(tree);
+#endif
+
+ // This is done in WriteBarrier when (regGC != 0)
+ if (regGC == 0)
+ {
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ }
+ }
+
+ addrReg = RBM_NONE;
+ break;
+ }
+
+ noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
+
+LExit:
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ */
+ if (lclVarNum < compiler->lvaCount)
+ siCheckVarScope(lclVarNum, lclILoffs);
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Generate code to complete the assignment operation
+ */
+
+void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl)
+{
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ noway_assert(op2);
+
+ if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
+ genUpdateLife(op1);
+ genUpdateLife(tree);
+
+#if REDUNDANT_LOAD
+
+ if (op1->gtOper == GT_LCL_VAR)
+ regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
+
+ /* Have we just assigned a value that is in a register? */
+
+ if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG)
+ {
+ regTracker.rsTrackRegAssign(op1, op2);
+ }
+
+#endif
+
+ noway_assert(addrReg != 0xDEADCAFE);
+
+ gcInfo.gcMarkRegSetNpt(addrReg);
+
+ if (ovfl)
+ {
+ noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
+
+ /* If GTF_REG_VAL is not set, and it is a small type, then
+ we must have loaded it up from memory, done the increment,
+ checked for overflow, and then stored it back to memory */
+
+ bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->gtFlags & GTF_REG_VAL);
+
+ if (!ovfCheckDone)
+ {
+ // For small sizes, reg should be set as we sign/zero extend it.
+
+ noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
+
+ /* Currently we don't morph x=x+y into x+=y in try blocks
+ * if we need overflow check, as x+y may throw an exception.
+ * We can do it if x is not live on entry to the catch block.
+ */
+ noway_assert(!compiler->compCurBB->hasTryIndex());
+
+ genCheckOverflow(tree);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a special op tree
+ */
+
+void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ genTreeOps oper = tree->OperGet();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP regs = regSet.rsMaskUsed;
+
+ noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
+
+ switch (oper)
+ {
+ case GT_CALL:
+ regs = genCodeForCall(tree, true);
+
+ /* If the result is in a register, make sure it ends up in the right place */
+
+ if (regs != RBM_NONE)
+ {
+ genMarkTreeInReg(tree, genRegNumFromMask(regs));
+ }
+
+ genUpdateLife(tree);
+ return;
+
+ case GT_FIELD:
+ NO_WAY("should not see this operator in this phase");
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+ {
+#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
+ // MUST NEVER CHECK-IN WITH THIS ENABLED.
+ // This is just for convenience in doing performance investigations and requires x86ret builds
+ if (!JitConfig.JitNoRngChk())
+#endif
+ genRangeCheck(tree);
+ }
+ return;
+
+ case GT_ARR_ELEM:
+ genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
+ return;
+
+ case GT_CMPXCHG:
+ {
+#if defined(_TARGET_XARCH_)
+ // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
+
+ // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling
+ // right now, just get the trees evaluated.
+
+ // As a friendly reminder. IL args are evaluated left to right.
+
+ GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1
+ GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2
+ GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
+ regMaskTP addrReg;
+
+ bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
+ RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
+
+ if (!isAddr)
+ {
+ genCodeForTree(location, RBM_NONE, RBM_NONE);
+ assert(location->gtFlags && GTF_REG_VAL);
+ addrReg = genRegMask(location->gtRegNum);
+ regSet.rsMarkRegUsed(location);
+ }
+
+ // We must have a reg for the Value, but it doesn't really matter which register.
+
+ // Try to avoid EAX and the address regsiter if possible.
+ genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
+
+#ifdef DEBUG
+ // cmpxchg uses EAX as an implicit operand to hold the comparand
+ // We're going to destroy EAX in this operation, so we better not be keeping
+ // anything important in it.
+ if (RBM_EAX & regSet.rsMaskVars)
+ {
+ // We have a variable enregistered in EAX. Make sure it goes dead in this tree.
+ for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
+ {
+ const LclVarDsc& varDesc = compiler->lvaTable[varNum];
+ if (!varDesc.lvIsRegCandidate())
+ continue;
+ if (!varDesc.lvRegister)
+ continue;
+ if (isFloatRegType(varDesc.lvType))
+ continue;
+ if (varDesc.lvRegNum != REG_EAX)
+ continue;
+ // We may need to check lvOtherReg.
+
+ // If the variable isn't going dead during this tree, we've just trashed a local with
+ // cmpxchg.
+ noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
+
+ break;
+ }
+ }
+#endif
+ genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+ // By this point we've evaluated everything. However the odds are that we've spilled something by
+ // now. Let's recover all the registers and force them to stay.
+
+ // Well, we just computed comparand, so it's still in EAX.
+ noway_assert(comparand->gtRegNum == REG_EAX);
+ regSet.rsLockUsedReg(RBM_EAX);
+
+ // Stick it anywhere other than EAX.
+ genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
+ reg = value->gtRegNum;
+ noway_assert(reg != REG_EAX);
+ regSet.rsLockUsedReg(genRegMask(reg));
+
+ if (isAddr)
+ {
+ addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
+ }
+ else
+ {
+ genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
+ }
+
+ regSet.rsUnlockUsedReg(genRegMask(reg));
+ regSet.rsUnlockUsedReg(RBM_EAX);
+
+ instGen(INS_lock);
+ if (isAddr)
+ {
+ sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
+ genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
+ }
+ else
+ {
+ instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
+ genReleaseReg(location);
+ }
+
+ genReleaseReg(value);
+ genReleaseReg(comparand);
+
+ // EAX and the value register are both trashed at this point.
+ regTracker.rsTrackRegTrash(REG_EAX);
+ regTracker.rsTrackRegTrash(reg);
+
+ reg = REG_EAX;
+
+ genFlagsEqualToNone();
+ break;
+#else // not defined(_TARGET_XARCH_)
+ NYI("GT_CMPXCHG codegen");
+ break;
+#endif
+ }
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected operator");
+ NO_WAY("unexpected operator");
+ }
+
+ noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for the given tree. tree->gtRegNum will be set to the
+ * register where the tree lives.
+ *
+ * If 'destReg' is non-zero, we'll do our best to compute the value into a
+ * register that is in that register set.
+ * Use genComputeReg() if you need the tree in a specific register.
+ * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
+ * the register can only be used for read, but not for write.
+ * Use genMakeAddressable() if you only need the tree to be accessible
+ * using a complex addressing mode, and do not necessarily need the tree
+ * materialized in a register.
+ *
+ * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
+ *
+ * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
+ * register will not be consumed right away and could possibly be spilled.
+ */
+
+void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+#if 0
+ if (compiler->verbose)
+ {
+ printf("Generating code for tree ");
+ Compiler::printTreeID(tree);
+ printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
+ }
+ genStressRegs(tree);
+#endif
+
+ noway_assert(tree);
+ noway_assert(tree->gtOper != GT_STMT);
+ assert(tree->IsNodeProperlySized());
+
+ // When assigning to a enregistered local variable we receive
+ // a hint that we should target the register that is used to
+ // hold the enregistered local variable.
+ // When receiving this hint both destReg and bestReg masks are set
+ // to the register that is used by the enregistered local variable.
+ //
+ // However it is possible to us to have a different local variable
+ // targeting the same register to become alive (and later die)
+ // as we descend the expression tree.
+ //
+ // To handle such cases we will remove any registers that are alive from the
+ // both the destReg and bestReg masks.
+ //
+ regMaskTP liveMask = genLiveMask(tree);
+
+ // This removes any registers used to hold enregistered locals
+ // from the destReg and bestReg masks.
+ // After this either mask could become 0
+ //
+ destReg &= ~liveMask;
+ bestReg &= ~liveMask;
+
+ /* 'destReg' of 0 really means 'any' */
+
+ destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
+
+ if (destReg != RBM_ALL(tree->TypeGet()))
+ bestReg = regSet.rsUseIfZero(bestReg, destReg);
+
+ // Long, float, and double have their own codegen functions
+ switch (tree->TypeGet())
+ {
+
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+ genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
+ return;
+
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+ // For comma nodes, we'll get back here for the last node in the comma list.
+ if (tree->gtOper != GT_COMMA)
+ {
+ genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
+ return;
+ }
+ break;
+#endif
+
+#ifdef DEBUG
+ case TYP_UINT:
+ case TYP_ULONG:
+ noway_assert(!"These types are only used as markers in GT_CAST nodes");
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+ /* Is the value already in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ genCodeForTree_REG_VAR1(tree);
+ return;
+ }
+
+ /* We better not have a spilled value here */
+
+ noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
+
+ /* Figure out what kind of a node we have */
+
+ unsigned kind = tree->OperKind();
+
+ if (kind & GTK_CONST)
+ {
+ /* Handle constant nodes */
+
+ genCodeForTreeConst(tree, destReg, bestReg);
+ }
+ else if (kind & GTK_LEAF)
+ {
+ /* Handle leaf nodes */
+
+ genCodeForTreeLeaf(tree, destReg, bestReg);
+ }
+ else if (kind & GTK_SMPOP)
+ {
+ /* Handle 'simple' unary/binary operators */
+
+ genCodeForTreeSmpOp(tree, destReg, bestReg);
+ }
+ else
+ {
+ /* Handle special operators */
+
+ genCodeForTreeSpecialOp(tree, destReg, bestReg);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for all the basic blocks in the function.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForBBlist()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ unsigned savedStkLvl;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+ unsigned stmtNum = 0;
+ unsigned totalCostEx = 0;
+ unsigned totalCostSz = 0;
+
+ // You have to be careful if you create basic blocks from now on
+ compiler->fgSafeBasicBlockCreation = false;
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnCall)
+ {
+ compiler->opts.compStackCheckOnCall = false;
+ }
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnRet)
+ {
+ compiler->opts.compStackCheckOnRet = false;
+ }
+#endif
+
+ // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+ genPrepForEHCodegen();
+
+ assert(!compiler->fgFirstBBScratch ||
+ compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+ /* Initialize the spill tracking logic */
+
+ regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+ /* Initialize the line# tracking logic */
+
+ if (compiler->opts.compScopeInfo)
+ {
+ siInit();
+ }
+#endif
+
+#ifdef _TARGET_X86_
+ if (compiler->compTailCallUsed)
+ {
+ noway_assert(isFramePointerUsed());
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+#endif
+
+ if (compiler->opts.compDbgEnC)
+ {
+ noway_assert(isFramePointerUsed());
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
+ /* If we have any pinvoke calls, we might potentially trash everything */
+
+ if (compiler->info.compCallUnmanaged)
+ {
+ noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
+ /* Initialize the pointer tracking code */
+
+ gcInfo.gcRegPtrSetInit();
+ gcInfo.gcVarPtrSetInit();
+
+ /* If any arguments live in registers, mark those regs as such */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter assigned to a register? */
+
+ if (!varDsc->lvIsParam || !varDsc->lvRegister)
+ continue;
+
+ /* Is the argument live on entry to the method? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ continue;
+
+#if CPU_HAS_FP_SUPPORT
+ /* Is this a floating-point argument? */
+
+ if (varDsc->IsFloatRegType())
+ continue;
+
+ noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+#endif
+
+ /* Mark the register as holding the variable */
+
+ if (isRegPairType(varDsc->lvType))
+ {
+ regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
+
+ if (varDsc->lvOtherReg != REG_STK)
+ regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
+ }
+ else
+ {
+ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ }
+ }
+
+ unsigned finallyNesting = 0;
+
+ // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+ // allocation at the start of each basic block.
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks and generate code for each one
+ *
+ */
+
+ BasicBlock* block;
+ BasicBlock* lblk; /* previous block */
+
+ for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n=============== Generating ");
+ block->dspBlockHeader(compiler, true, true);
+ compiler->fgDispBBLiveness(block);
+ }
+#endif // DEBUG
+
+ VARSET_TP VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal());
+
+ regMaskTP gcrefRegs = 0;
+ regMaskTP byrefRegs = 0;
+
+ /* Does any other block jump to this point ? */
+
+ if (block->bbFlags & BBF_JMP_TARGET)
+ {
+ /* Someone may jump here, so trash all regs */
+
+ regTracker.rsTrackRegClr();
+
+ genFlagsEqualToNone();
+ }
+ else
+ {
+ /* No jump, but pointers always need to get trashed for proper GC tracking */
+
+ regTracker.rsTrackRegClrPtr();
+ }
+
+ /* No registers are used or locked on entry to a basic block */
+
+ regSet.rsMaskUsed = RBM_NONE;
+ regSet.rsMaskMult = RBM_NONE;
+ regSet.rsMaskLock = RBM_NONE;
+
+ // If we need to reserve registers such that they are not used
+ // by CodeGen in this BasicBlock we do so here.
+ // On the ARM when we have large frame offsets for locals we
+ // will have RBM_R10 in the regSet.rsMaskResvd set,
+ // additionally if a LocAlloc or alloca is used RBM_R9 is in
+ // the regSet.rsMaskResvd set and we lock these registers here.
+ //
+ if (regSet.rsMaskResvd != RBM_NONE)
+ {
+ regSet.rsLockReg(regSet.rsMaskResvd);
+ regSet.rsSetRegsModified(regSet.rsMaskResvd);
+ }
+
+ /* Figure out which registers hold variables on entry to this block */
+
+ regMaskTP specialUseMask = regSet.rsMaskResvd;
+
+ specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
+ regSet.ClearMaskVars();
+ VarSetOps::ClearD(compiler, compiler->compCurLife);
+ VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
+
+#if FEATURE_STACK_FP_X87
+ VarSetOps::AssignNoCopy(compiler, genFPregVars,
+ VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
+ genFPregCnt = VarSetOps::Count(compiler, genFPregVars);
+ genFPdeadRegCnt = 0;
+#endif
+ gcInfo.gcResetForBB();
+
+ genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
+#if FEATURE_STACK_FP_X87
+ VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
+#endif
+
+ // We should never enregister variables in any of the specialUseMask registers
+ noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
+
+ VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ varNum = compiler->lvaTrackedToVarNum[varIndex];
+ varDsc = compiler->lvaTable + varNum;
+ assert(varDsc->lvTracked);
+ /* Ignore the variable if it's not not in a reg */
+
+ if (!varDsc->lvRegister)
+ continue;
+ if (isFloatRegType(varDsc->lvType))
+ continue;
+
+ /* Get hold of the index and the bitmask for the variable */
+ regNumber regNum = varDsc->lvRegNum;
+ regMaskTP regMask = genRegMask(regNum);
+
+ regSet.AddMaskVars(regMask);
+
+ if (varDsc->lvType == TYP_REF)
+ gcrefRegs |= regMask;
+ else if (varDsc->lvType == TYP_BYREF)
+ byrefRegs |= regMask;
+
+ /* Mark the register holding the variable as such */
+
+ if (varTypeIsMultiReg(varDsc))
+ {
+ regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
+ regMask |= genRegMask(varDsc->lvOtherReg);
+ }
+ }
+ else
+ {
+ regTracker.rsTrackRegLclVar(regNum, varNum);
+ }
+ }
+
+ gcInfo.gcPtrArgCnt = 0;
+
+#if FEATURE_STACK_FP_X87
+
+ regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
+
+ memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
+ memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
+
+ // Setup fp state on block entry
+ genSetupStateStackFP(block);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ JitDumpFPState();
+ }
+#endif // DEBUG
+#endif // FEATURE_STACK_FP_X87
+
+ /* Make sure we keep track of what pointers are live */
+
+ noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
+ gcInfo.gcRegGCrefSetCur = gcrefRegs;
+ gcInfo.gcRegByrefSetCur = byrefRegs;
+
+ /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+ represent the exception object (TYP_REF).
+ We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+ to the block, it will be the first thing evaluated
+ (thanks to GTF_ORDER_SIDEEFF).
+ */
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ GenTreePtr firstStmt = block->FirstNonPhiDef();
+ if (firstStmt != NULL)
+ {
+ GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
+ if (compiler->gtHasCatchArg(firstTree))
+ {
+ gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
+ }
+ }
+ }
+
+ /* Start a new code output block */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+#if defined(_TARGET_ARM_)
+ // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
+ // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
+ // calls the funclet during non-exceptional control flow.
+ if (block->bbFlags & BBF_FINALLY_TARGET)
+ {
+ assert(block->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
+ }
+#endif
+ // Create a label that we'll use for computing the start of an EH region, if this block is
+ // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+ // determining the EH regions, then this NOP would end up outside of the region, if this
+ // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+ // would be executed, which we would prefer not to do.
+
+ block->bbUnwindNopEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+ instGen(INS_nop);
+ }
+#endif // defined(_TARGET_ARM_)
+
+ genUpdateCurrentFunclet(block);
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef _TARGET_XARCH_
+ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+ {
+ getEmitter()->emitLoopAlign();
+ }
+#endif
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+#endif
+
+ block->bbEmitCookie = NULL;
+
+ if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+ {
+ /* Mark a label and update the current set of live GC refs */
+
+ block->bbEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
+#else
+ FALSE
+#endif
+ );
+ }
+
+ if (block == compiler->fgFirstColdBlock)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nThis is the start of the cold region of the method\n");
+ }
+#endif
+ // We should never have a block that falls through into the Cold section
+ noway_assert(!lblk->bbFallsThrough());
+
+ // We require the block that starts the Cold section to have a label
+ noway_assert(block->bbEmitCookie);
+ getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+ }
+
+ /* Both stacks are always empty on entry to a basic block */
+
+ genStackLevel = 0;
+#if FEATURE_STACK_FP_X87
+ genResetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+
+#if !FEATURE_FIXED_OUT_ARGS
+ /* Check for inserted throw blocks and adjust genStackLevel */
+
+ if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
+ {
+ noway_assert(block->bbFlags & BBF_JMP_TARGET);
+
+ genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
+
+ if (genStackLevel)
+ {
+#ifdef _TARGET_X86_
+ getEmitter()->emitMarkStackLvl(genStackLevel);
+ inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
+ genStackLevel = 0;
+#else // _TARGET_X86_
+ NYI("Need emitMarkStackLvl()");
+#endif // _TARGET_X86_
+ }
+ }
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ savedStkLvl = genStackLevel;
+
+ /* Tell everyone which basic block we're working on */
+
+ compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+ siBeginBlock(block);
+
+ // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+ if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+
+ bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+ /*---------------------------------------------------------------------
+ *
+ * Generate code for each statement-tree in the block
+ *
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ genReserveFuncletProlog(block);
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+#if defined(DEBUGGING_SUPPORT)
+
+ /* Do we have a new IL-offset ? */
+
+ if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ /* Create and append a new IP-mapping entry */
+ genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
+ firstMapping = false;
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+ {
+ noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
+ if (compiler->opts.dspCode && compiler->opts.dspInstrs)
+ {
+ while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
+ {
+ genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
+ }
+ }
+ }
+#endif // DEBUG
+
+ /* Get hold of the statement tree */
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+ stmtNum++;
+ if (compiler->verbose)
+ {
+ printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
+ printf("Holding variables: ");
+ dspRegMask(regSet.rsMaskVars);
+ printf("\n\n");
+ compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
+ printf("\n");
+#if FEATURE_STACK_FP_X87
+ JitDumpFPState();
+#endif
+
+ printf("Execution Order:\n");
+ for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
+ {
+ compiler->gtDispTree(treeNode, 0, NULL, true);
+ }
+ printf("\n");
+ }
+ totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
+ totalCostSz += stmt->gtCostSz;
+#endif // DEBUG
+
+ compiler->compCurStmt = stmt;
+
+ compiler->compCurLifeTree = NULL;
+ switch (tree->gtOper)
+ {
+ case GT_CALL:
+ // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
+ // reported as alive even though not used within the caller for managed debugger sake. So
+ // consider the return value of the method as used if generating debuggable code.
+ genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+ genUpdateLife(tree);
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ break;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+
+ // Just do the side effects
+ genEvalSideEffects(tree);
+ break;
+
+ default:
+ /* Generate code for the tree */
+
+ genCodeForTree(tree, 0);
+ break;
+ }
+
+ regSet.rsSpillChk();
+
+ /* The value of the tree isn't used, unless it's a return stmt */
+
+ if (tree->gtOper != GT_RETURN)
+ gcInfo.gcMarkRegPtrVal(tree);
+
+#if FEATURE_STACK_FP_X87
+ genEndOfStatement();
+#endif
+
+#ifdef DEBUG
+ /* Make sure we didn't bungle pointer register tracking */
+
+ regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
+ regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+ // If return is a GC-type, clear it. Note that if a common
+ // epilog is generated (compiler->genReturnBB) it has a void return
+ // even though we might return a ref. We can't use the compRetType
+ // as the determiner because something we are tracking as a byref
+ // might be used as a return value of a int function (which is legal)
+ if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
+ (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
+ {
+ nonVarPtrRegs &= ~RBM_INTRET;
+ }
+
+ // When profiling, the first statement in a catch block will be the
+ // harmless "inc" instruction (does not interfere with the exception
+ // object).
+
+ if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) &&
+ (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
+ {
+ nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
+ }
+
+ if (nonVarPtrRegs)
+ {
+ printf("Regset after tree=");
+ Compiler::printTreeID(tree);
+ printf(" BB%02u gcr=", block->bbNum);
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ printf(", byr=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ printf(", regVars=");
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf("\n");
+ }
+
+ noway_assert(nonVarPtrRegs == 0);
+#endif // DEBUG
+
+ noway_assert(stmt->gtOper == GT_STMT);
+
+#ifdef DEBUGGING_SUPPORT
+ genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
+#endif
+
+ } //-------- END-FOR each statement-tree of the current block ---------
+
+#ifdef DEBUGGING_SUPPORT
+
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ siEndBlock(block);
+
+ /* Is this the last block, and are there any open scopes left ? */
+
+ bool isLastBlockProcessed = (block->bbNext == NULL);
+ if (block->isBBCallAlwaysPair())
+ {
+ isLastBlockProcessed = (block->bbNext->bbNext == NULL);
+ }
+
+ if (isLastBlockProcessed && siOpenScopeList.scNext)
+ {
+ /* This assert no longer holds, because we may insert a throw
+ block to demarcate the end of a try or finally region when they
+ are at the end of the method. It would be nice if we could fix
+ our code so that this throw block will no longer be necessary. */
+
+ // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+ siCloseAllOpenScopes();
+ }
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+ genStackLevel -= savedStkLvl;
+
+ gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
+
+ if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
+ compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
+
+ /* Both stacks should always be empty on exit from a basic block */
+
+ noway_assert(genStackLevel == 0);
+#if FEATURE_STACK_FP_X87
+ noway_assert(genGetFPstkLevel() == 0);
+
+ // Do the FPState matching that may have to be done
+ genCodeForEndBlockTransitionStackFP(block);
+#endif
+
+ noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
+
+ /* Do we need to generate a jump or return? */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+ break;
+
+ case BBJ_RETURN:
+ genExitCode(block);
+ break;
+
+ case BBJ_THROW:
+ // If we have a throw at the end of a function or funclet, we need to emit another instruction
+ // afterwards to help the OS unwinder determine the correct context during unwind.
+ // We insert an unexecuted breakpoint instruction in several situations
+ // following a throw instruction:
+ // 1. If the throw is the last instruction of the function or funclet. This helps
+ // the OS unwinder determine the correct context during an unwind from the
+ // thrown exception.
+ // 2. If this is this is the last block of the hot section.
+ // 3. If the subsequent block is a special throw block.
+ if ((block->bbNext == NULL)
+#if FEATURE_EH_FUNCLETS
+ || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
+#endif // FEATURE_EH_FUNCLETS
+ || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+ block->bbNext == compiler->fgFirstColdBlock)
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+
+ break;
+
+ case BBJ_CALLFINALLY:
+
+#if defined(_TARGET_X86_)
+
+ /* If we are about to invoke a finally locally from a try block,
+ we have to set the hidden slot corresponding to the finally's
+ nesting level. When invoked in response to an exception, the
+ EE usually does it.
+
+ We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
+
+ This code depends on this order not being messed up.
+ We will emit :
+ mov [ebp-(n+1)],0
+ mov [ebp- n ],0xFC
+ push &step
+ jmp finallyBlock
+
+ step: mov [ebp- n ],0
+ jmp leaveTarget
+ leaveTarget:
+ */
+
+ noway_assert(isFramePointerUsed());
+
+ // Get the nesting level which contains the finally
+ compiler->fgGetNestingLevel(block, &finallyNesting);
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ filterEndOffsetSlotOffs =
+ (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
+
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
+
+ // Zero out the slot for the next nesting level
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs - sizeof(void*));
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs);
+
+ // Now push the address of where the finally funclet should
+ // return to directly.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+ getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
+ }
+ else
+ {
+ // EE expects a DWORD, so we give him 0
+ inst_IV(INS_push_hide, 0);
+ }
+
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+
+#elif defined(_TARGET_ARM_)
+
+ // Now set REG_LR to the address of where the finally funclet should
+ // return to directly.
+
+ BasicBlock* bbFinallyRet;
+ bbFinallyRet = NULL;
+
+ // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
+ // we would have otherwise created retless calls.
+ assert(block->isBBCallAlwaysPair());
+
+ assert(block->bbNext != NULL);
+ assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
+ assert(block->bbNext->bbJumpDest != NULL);
+ assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
+
+ bbFinallyRet = block->bbNext->bbJumpDest;
+ bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
+
+#if 0
+ // We don't know the address of finally funclet yet. But adr requires the offset
+ // to finally funclet from current IP is within 4095 bytes. So this code is disabled
+ // for now.
+ getEmitter()->emitIns_J_R (INS_adr,
+ EA_4BYTE,
+ bbFinallyRet,
+ REG_LR);
+#else // 0
+ // Load the address where the finally funclet should return into LR.
+ // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
+ // the return.
+ getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+ getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+ regTracker.rsTrackRegTrash(REG_LR);
+#endif // 0
+
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+#else
+ NYI("TARGET");
+#endif
+
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ lblk = block;
+ block = block->bbNext;
+ }
+ break;
+
+#ifdef _TARGET_ARM_
+
+ case BBJ_EHCATCHRET:
+ // set r0 to the address the VM should return to after the catch
+ getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+ getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+ regTracker.rsTrackRegTrash(REG_R0);
+
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+#else // _TARGET_ARM_
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ case BBJ_EHCATCHRET:
+ break;
+
+#endif // _TARGET_ARM_
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->compCurBB = 0;
+#endif
+
+ } //------------------ END-FOR each block of the method -------------------
+
+ /* Nothing is live at this point */
+ genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+ /* Finalize the spill tracking logic */
+
+ regSet.rsSpillEnd();
+
+ /* Finalize the temp tracking logic */
+
+ compiler->tmpEnd();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n# ");
+ printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
+ printf("%s\n", compiler->info.compFullName);
+ }
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Generate code for a long operation.
+ * needReg is a recommendation of which registers to use for the tree.
+ * For partially enregistered longs, the tree will be marked as GTF_REG_VAL
+ * without loading the stack part into a register. Note that only leaf
+ * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
+ * enregistered so that we can know the memory location of the other half.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+ regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
+ regMaskTP addrReg;
+ regNumber regLo;
+ regNumber regHi;
+
+ noway_assert(tree);
+ noway_assert(tree->gtOper != GT_STMT);
+ noway_assert(genActualType(tree->gtType) == TYP_LONG);
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ REG_VAR_LONG:
+ regPair = tree->gtRegPair;
+
+ gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
+
+ goto DONE;
+ }
+
+ /* Is this a constant node? */
+
+ if (kind & GTK_CONST)
+ {
+ __int64 lval;
+
+ /* Pick a register pair for the value */
+
+ regPair = regSet.rsPickRegPair(needReg);
+
+ /* Load the value into the registers */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !CPU_HAS_FP_SUPPORT
+ if (oper == GT_CNS_DBL)
+ {
+ noway_assert(sizeof(__int64) == sizeof(double));
+
+ noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
+
+ lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
+ }
+ else
+#endif
+ {
+ noway_assert(oper == GT_CNS_LNG);
+
+ lval = tree->gtLngCon.gtLconVal;
+ }
+
+ genSetRegToIcon(genRegPairLo(regPair), int(lval));
+ genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
+ goto DONE;
+ }
+
+ /* Is this a leaf node? */
+
+ if (kind & GTK_LEAF)
+ {
+ switch (oper)
+ {
+ case GT_LCL_VAR:
+
+#if REDUNDANT_LOAD
+
+ /* This case has to consider the case in which an int64 LCL_VAR
+ * may both be enregistered and also have a cached copy of itself
+ * in a different set of registers.
+ * We want to return the registers that have the most in common
+ * with the needReg mask
+ */
+
+ /* Does the var have a copy of itself in the cached registers?
+ * And are these cached registers both free?
+ * If so use these registers if they match any needReg.
+ */
+
+ regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
+
+ if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
+ ((genRegPairMask(regPair) & needReg) != RBM_NONE))
+ {
+ goto DONE;
+ }
+
+ /* Does the variable live in a register?
+ * If so use these registers.
+ */
+ if (genMarkLclVar(tree))
+ goto REG_VAR_LONG;
+
+ /* If tree is not an enregistered variable then
+ * be sure to use any cached register that contain
+ * a copy of this local variable
+ */
+ if (regPair != REG_PAIR_NONE)
+ {
+ goto DONE;
+ }
+#endif
+ goto MEM_LEAF;
+
+ case GT_LCL_FLD:
+
+ // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+ goto MEM_LEAF;
+
+ case GT_CLS_VAR:
+ MEM_LEAF:
+
+ /* Pick a register pair for the value */
+
+ regPair = regSet.rsPickRegPair(needReg);
+
+ /* Load the value into the registers */
+
+ instruction loadIns;
+
+ loadIns = ins_Load(TYP_INT); // INS_ldr
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+#if CPU_LOAD_STORE_ARCH
+ {
+ regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
+ inst_RV_TT(INS_lea, regAddr, tree, 0);
+ regTracker.rsTrackRegTrash(regAddr);
+
+ if (regLo != regAddr)
+ {
+ // assert(regLo != regAddr); // forced by if statement
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
+ }
+ else
+ {
+ // assert(regHi != regAddr); // implied by regpair property and the if statement
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
+ }
+ }
+#else
+ inst_RV_TT(loadIns, regLo, tree, 0);
+ inst_RV_TT(loadIns, regHi, tree, 4);
+#endif
+
+#ifdef _TARGET_ARM_
+ if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ goto DONE;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected leaf");
+ }
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ instruction insLo;
+ instruction insHi;
+ bool doLo;
+ bool doHi;
+ bool setCarry = false;
+ int helper;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ switch (oper)
+ {
+ case GT_ASG:
+ {
+#ifdef DEBUGGING_SUPPORT
+ unsigned lclVarNum = compiler->lvaCount;
+ unsigned lclVarILoffs = DUMMY_INIT(0);
+#endif
+
+ /* Is the target a local ? */
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc;
+
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+ // No dead stores, (with min opts we may have dead stores)
+ noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
+
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ * Remember the local var info to call siCheckVarScope
+ * AFTER codegen of the assignment.
+ */
+ if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
+ (compiler->info.compVarScopesCount > 0))
+ {
+ lclVarNum = varNum;
+ lclVarILoffs = op1->gtLclVar.gtLclILoffs;
+ }
+#endif
+
+ /* Has the variable been assigned to a register (pair) ? */
+
+ if (genMarkLclVar(op1))
+ {
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regPair = op1->gtRegPair;
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+ noway_assert(regLo != regHi);
+
+ /* Is the value being assigned a constant? */
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ /* Move the value into the target */
+
+ genMakeRegPairAvailable(regPair);
+
+ instruction ins;
+ if (regLo == REG_STK)
+ {
+ ins = ins_Store(TYP_INT);
+ }
+ else
+ {
+ // Always do the stack first (in case it grabs a register it can't
+ // clobber regLo this way)
+ if (regHi == REG_STK)
+ {
+ inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+ }
+ ins = INS_mov;
+ }
+ inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
+
+ // The REG_STK case has already been handled
+ if (regHi != REG_STK)
+ {
+ ins = INS_mov;
+ inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+ }
+
+ goto DONE_ASSG_REGS;
+ }
+
+ /* Compute the RHS into desired register pair */
+
+ if (regHi != REG_STK)
+ {
+ genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ noway_assert(op2->gtRegPair == regPair);
+ }
+ else
+ {
+ regPairNo curPair;
+ regNumber curLo;
+ regNumber curHi;
+
+ genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ curPair = op2->gtRegPair;
+ curLo = genRegPairLo(curPair);
+ curHi = genRegPairHi(curPair);
+
+ /* move high first, target is on stack */
+ inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
+
+ if (regLo != curLo)
+ {
+ if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
+ regSet.rsSpillReg(regLo);
+ inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
+ regTracker.rsTrackRegCopy(regLo, curLo);
+ }
+ }
+
+ genReleaseRegPair(op2);
+ goto DONE_ASSG_REGS;
+ }
+ }
+
+ /* Is the value being assigned a constant? */
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
+
+ /* Move the value into the target */
+
+ inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
+ inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ goto LAsgExit;
+ }
+
+#if 0
+ /* Catch a case where can avoid generating op reg, mem. Better pairing
+ * from
+ * mov regHi, mem
+ * op regHi, reg
+ *
+ * To avoid problems with order of evaluation, only do this if op2 is
+ * a non-enregistered local variable
+ */
+
+ if (GenTree::OperIsCommutative(oper) &&
+ op1->gtOper == GT_LCL_VAR &&
+ op2->gtOper == GT_LCL_VAR)
+ {
+ regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
+
+ /* Is op2 a non-enregistered local variable? */
+ if (regPair == REG_PAIR_NONE)
+ {
+ regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
+
+ /* Is op1 an enregistered local variable? */
+ if (regPair != REG_PAIR_NONE)
+ {
+ /* Swap the operands */
+ GenTreePtr op = op1;
+ op1 = op2;
+ op2 = op;
+ }
+ }
+ }
+#endif
+
+ /* Eliminate worthless assignment "lcl = lcl" */
+
+ if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
+ op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
+ {
+ genUpdateLife(op2);
+ goto LAsgExit;
+ }
+
+ if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
+ // op1,op2 need to be materialized in the correct order.
+ (tree->gtFlags & GTF_REVERSE_OPS))
+ {
+ /* Generate the small RHS into a register pair */
+
+ GenTreePtr smallOpr = op2->gtOp.gtOp1;
+
+ genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
+
+ /* Make sure everything is still addressable */
+
+ genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
+ noway_assert(smallOpr->gtFlags & GTF_REG_VAL);
+ regHi = smallOpr->gtRegNum;
+ addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
+
+ // conv.ovf.u8 could overflow if the original number was negative
+ if (op2->gtOverflow())
+ {
+ noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
+ 0); // conv.ovf.u8.un should be bashed to conv.u8.un
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ }
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
+ inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
+
+ /* Free up anything that was tied up by either side */
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ genReleaseReg(smallOpr);
+
+#if REDUNDANT_LOAD
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ /* clear this local from reg table */
+ regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
+
+ /* mark RHS registers as containing the local var */
+ regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
+ }
+#endif
+ goto LAsgExit;
+ }
+
+ /* Is the LHS more complex than the RHS? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Generate the RHS into a register pair */
+
+ genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make the target addressable */
+ op1 = genCodeForCommaTree(op1);
+ addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
+
+ /* Make sure the RHS register hasn't been spilled */
+
+ genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Make the target addressable */
+
+ op1 = genCodeForCommaTree(op1);
+ addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
+
+ /* Generate the RHS into a register pair */
+
+ genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
+ }
+
+ /* Lock 'op2' and make sure 'op1' is still addressable */
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regPair = op2->gtRegPair;
+
+ addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
+ inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
+
+ /* Free up anything that was tied up by either side */
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ genReleaseRegPair(op2);
+
+ DONE_ASSG_REGS:
+
+#if REDUNDANT_LOAD
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ /* Clear this local from reg table */
+
+ regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
+
+ if ((op2->gtFlags & GTF_REG_VAL) &&
+ /* constant has precedence over local */
+ // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
+ tree->gtOper == GT_ASG)
+ {
+ regNumber regNo;
+
+ /* mark RHS registers as containing the local var */
+
+ regNo = genRegPairLo(op2->gtRegPair);
+ if (regNo != REG_STK)
+ regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
+
+ regNo = genRegPairHi(op2->gtRegPair);
+ if (regNo != REG_STK)
+ {
+ /* For partially enregistered longs, we might have
+ stomped on op2's hiReg */
+ if (!(op1->gtFlags & GTF_REG_VAL) || regNo != genRegPairLo(op1->gtRegPair))
+ {
+ regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
+ }
+ }
+ }
+ }
+#endif
+
+ LAsgExit:
+
+ genUpdateLife(op1);
+ genUpdateLife(tree);
+
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ */
+ if (lclVarNum < compiler->lvaCount)
+ siCheckVarScope(lclVarNum, lclVarILoffs);
+#endif
+ }
+ return;
+
+ case GT_SUB:
+ insLo = INS_sub;
+ insHi = INS_SUBC;
+ setCarry = true;
+ goto BINOP_OVF;
+ case GT_ADD:
+ insLo = INS_add;
+ insHi = INS_ADDC;
+ setCarry = true;
+ goto BINOP_OVF;
+
+ bool ovfl;
+
+ BINOP_OVF:
+ ovfl = tree->gtOverflow();
+ goto _BINOP;
+
+ case GT_AND:
+ insLo = insHi = INS_AND;
+ goto BINOP;
+ case GT_OR:
+ insLo = insHi = INS_OR;
+ goto BINOP;
+ case GT_XOR:
+ insLo = insHi = INS_XOR;
+ goto BINOP;
+
+ BINOP:
+ ovfl = false;
+ goto _BINOP;
+
+ _BINOP:
+
+ /* The following makes an assumption about gtSetEvalOrder(this) */
+
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Special case: check for "(long(intval) << 32) | longval" */
+
+ if (oper == GT_OR && op1->gtOper == GT_LSH)
+ {
+ GenTreePtr lshLHS = op1->gtOp.gtOp1;
+ GenTreePtr lshRHS = op1->gtOp.gtOp2;
+
+ if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
+ genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
+ {
+
+ /* Throw away the cast of the shift operand. */
+
+ op1 = lshLHS->gtCast.CastOp();
+
+ /* Special case: check op2 for "ulong(intval)" */
+ if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
+ genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
+ {
+ /* Throw away the cast of the second operand. */
+
+ op2 = op2->gtCast.CastOp();
+ goto SIMPLE_OR_LONG;
+ }
+ /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
+ else if (op2->gtOper == GT_AND)
+ {
+ GenTreePtr andLHS;
+ andLHS = op2->gtOp.gtOp1;
+ GenTreePtr andRHS;
+ andRHS = op2->gtOp.gtOp2;
+
+ if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
+ andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
+ genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
+ {
+ /* Throw away the cast of the second operand. */
+
+ op2 = andLHS->gtCast.CastOp();
+
+ SIMPLE_OR_LONG:
+ // Load the high DWORD, ie. op1
+
+ genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regHi = op1->gtRegNum;
+ regSet.rsMarkRegUsed(op1);
+
+ // Load the low DWORD, ie. op2
+
+ genCodeForTree(op2, needReg & ~genRegMask(regHi));
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regLo = op2->gtRegNum;
+
+ /* Make sure regHi is still around. Also, force
+ regLo to be excluded in case regLo==regHi */
+
+ genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
+ regHi = op1->gtRegNum;
+
+ regPair = gen2regs2pair(regLo, regHi);
+ goto DONE;
+ }
+ }
+
+ /* Generate the following sequence:
+ Prepare op1 (discarding shift)
+ Compute op2 into some regpair
+ OR regpairhi, op1
+ */
+
+ /* First, make op1 addressable */
+
+ /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
+
+ It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
+ which the long value is computed is from needReg. But at this point the safest fix is
+ to exclude regSet.rsMaskResvd.
+
+ Note that needReg could be the set of free registers (excluding reserved ones). If we don't
+ exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
+ reg from
+ reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd.
+ */
+ regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
+
+ addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
+
+ genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regPair = op2->gtRegPair;
+ regHi = genRegPairHi(regPair);
+
+ /* The operand might have interfered with the address */
+
+ addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
+
+ /* Now compute the result */
+
+ inst_RV_TT(insHi, regHi, op1, 0);
+
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Free up anything that was tied up by the LHS */
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ /* The result is where the second operand is sitting */
+
+ genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
+
+ regPair = op2->gtRegPair;
+ goto DONE;
+ }
+ }
+
+ /* Special case: check for "longval | (long(intval) << 32)" */
+
+ if (oper == GT_OR && op2->gtOper == GT_LSH)
+ {
+ GenTreePtr lshLHS = op2->gtOp.gtOp1;
+ GenTreePtr lshRHS = op2->gtOp.gtOp2;
+
+ if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
+ genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
+
+ {
+ /* We throw away the cast of the shift operand. */
+
+ op2 = lshLHS->gtCast.CastOp();
+
+ /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
+
+ if (op1->gtOper == GT_AND)
+ {
+ GenTreePtr andLHS = op1->gtOp.gtOp1;
+ GenTreePtr andRHS = op1->gtOp.gtOp2;
+
+ if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
+ andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
+ genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
+ {
+ /* Throw away the cast of the first operand. */
+
+ op1 = andLHS->gtCast.CastOp();
+
+ // Load the low DWORD, ie. op1
+
+ genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regLo = op1->gtRegNum;
+ regSet.rsMarkRegUsed(op1);
+
+ // Load the high DWORD, ie. op2
+
+ genCodeForTree(op2, needReg & ~genRegMask(regLo));
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regHi = op2->gtRegNum;
+
+ /* Make sure regLo is still around. Also, force
+ regHi to be excluded in case regLo==regHi */
+
+ genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
+ regLo = op1->gtRegNum;
+
+ regPair = gen2regs2pair(regLo, regHi);
+ goto DONE;
+ }
+ }
+
+ /* Generate the following sequence:
+ Compute op1 into some regpair
+ Make op2 (ignoring shift) addressable
+ OR regPairHi, op2
+ */
+
+ // First, generate the first operand into some register
+
+ genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Make the second operand addressable */
+
+ addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
+
+ /* Make sure the result is in a free register pair */
+
+ genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+ regPair = op1->gtRegPair;
+ regHi = genRegPairHi(regPair);
+
+ /* The operand might have interfered with the address */
+
+ addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
+
+ /* Compute the new value */
+
+ inst_RV_TT(insHi, regHi, op2, 0);
+
+ /* The value in the high register has been trashed */
+
+ regTracker.rsTrackRegTrash(regHi);
+
+ goto DONE_OR;
+ }
+ }
+
+ /* Generate the first operand into registers */
+
+ if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
+ ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
+ {
+ regPair = regSet.rsPickRegPair(needReg);
+ genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+ }
+ else
+ {
+ genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regMaskTP op1Mask;
+ regPair = op1->gtRegPair;
+ op1Mask = genRegPairMask(regPair);
+
+ /* Make the second operand addressable */
+ regMaskTP needReg2;
+ needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
+ addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
+
+ // TODO: If 'op1' got spilled and 'op2' happens to be
+ // TODO: in a register, and we have add/mul/and/or/xor,
+ // TODO: reverse the operands since we can perform the
+ // TODO: operation directly with the spill temp, e.g.
+ // TODO: 'add regHi, [temp]'.
+
+ /* Make sure the result is in a free register pair */
+
+ genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+ regPair = op1->gtRegPair;
+ op1Mask = genRegPairMask(regPair);
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* Make sure that we don't spill regLo/regHi below */
+ regSet.rsLockUsedReg(op1Mask);
+
+ /* The operand might have interfered with the address */
+
+ addrReg = genKeepAddressable(op2, addrReg);
+
+ /* The value in the register pair is about to be trashed */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Compute the new value */
+
+ doLo = true;
+ doHi = true;
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ __int64 icon = op2->gtLngCon.gtLconVal;
+
+ /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
+
+ switch (oper)
+ {
+ case GT_AND:
+ if ((int)(icon) == -1)
+ doLo = false;
+ if ((int)(icon >> 32) == -1)
+ doHi = false;
+
+ if (!(icon & I64(0x00000000FFFFFFFF)))
+ {
+ genSetRegToIcon(regLo, 0);
+ doLo = false;
+ }
+
+ if (!(icon & I64(0xFFFFFFFF00000000)))
+ {
+ /* Just to always set low first*/
+
+ if (doLo)
+ {
+ inst_RV_TT(insLo, regLo, op2, 0);
+ doLo = false;
+ }
+ genSetRegToIcon(regHi, 0);
+ doHi = false;
+ }
+
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ if (!(icon & I64(0x00000000FFFFFFFF)))
+ doLo = false;
+ if (!(icon & I64(0xFFFFFFFF00000000)))
+ doHi = false;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Fix 383813 X86/ARM ILGEN
+ // Fix 383793 ARM ILGEN
+ // Fix 383911 ARM ILGEN
+ regMaskTP newMask;
+ newMask = addrReg & ~op1Mask;
+ regSet.rsLockUsedReg(newMask);
+
+ if (doLo)
+ {
+ insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
+ }
+ if (doHi)
+ {
+ insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
+ }
+
+ regSet.rsUnlockUsedReg(newMask);
+ regSet.rsUnlockUsedReg(op1Mask);
+
+ DONE_OR:
+
+ /* Free up anything that was tied up by the LHS */
+
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ /* The result is where the first operand is sitting */
+
+ genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
+
+ regPair = op1->gtRegPair;
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ goto DONE;
+
+ case GT_UMOD:
+
+ regPair = genCodeForLongModInt(tree, needReg);
+ goto DONE;
+
+ case GT_MUL:
+
+ /* Special case: both operands promoted from int */
+
+ assert(tree->gtIsValid64RsltMul());
+
+ /* Change to an integer multiply temporarily */
+
+ tree->gtType = TYP_INT;
+
+ noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
+ tree->gtOp.gtOp1 = op1->gtCast.CastOp();
+ tree->gtOp.gtOp2 = op2->gtCast.CastOp();
+
+ assert(tree->gtFlags & GTF_MUL_64RSLT);
+
+#if defined(_TARGET_X86_)
+ // imul on x86 requires EDX:EAX
+ genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+ noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
+#elif defined(_TARGET_ARM_)
+ genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+#else
+ assert(!"Unsupported target for 64-bit multiply codegen");
+#endif
+
+ /* Restore gtType, op1 and op2 from the change above */
+
+ tree->gtType = TYP_LONG;
+ tree->gtOp.gtOp1 = op1;
+ tree->gtOp.gtOp2 = op2;
+
+#if defined(_TARGET_X86_)
+ /* The result is now in EDX:EAX */
+ regPair = REG_PAIR_EAXEDX;
+#elif defined(_TARGET_ARM_)
+ regPair = tree->gtRegPair;
+#endif
+ goto DONE;
+
+ case GT_LSH:
+ helper = CORINFO_HELP_LLSH;
+ goto SHIFT;
+ case GT_RSH:
+ helper = CORINFO_HELP_LRSH;
+ goto SHIFT;
+ case GT_RSZ:
+ helper = CORINFO_HELP_LRSZ;
+ goto SHIFT;
+
+ SHIFT:
+
+ noway_assert(op1->gtType == TYP_LONG);
+ noway_assert(genActualType(op2->gtType) == TYP_INT);
+
+ /* Is the second operand a constant? */
+
+ if (op2->gtOper == GT_CNS_INT)
+ {
+ unsigned int count = op2->gtIntCon.gtIconVal;
+
+ /* Compute the left operand into a free register pair */
+
+ genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regPair = op1->gtRegPair;
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* Assume the value in the register pair is trashed. In some cases, though,
+ a register might be set to zero, and we can use that information to improve
+ some code generation.
+ */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Generate the appropriate shift instructions */
+
+ switch (oper)
+ {
+ case GT_LSH:
+ if (count == 0)
+ {
+ // regHi, regLo are correct
+ }
+ else if (count < 32)
+ {
+#if defined(_TARGET_XARCH_)
+ inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
+#elif defined(_TARGET_ARM_)
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
+ getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
+#else // _TARGET_*
+ NYI("INS_shld");
+#endif // _TARGET_*
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
+ }
+ else // count >= 32
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+#if defined(_TARGET_ARM_)
+ if (count == 32)
+ {
+ // mov low dword into high dword (i.e. shift left by 32-bits)
+ inst_RV_RV(INS_mov, regHi, regLo);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+ getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
+ count - 32);
+ }
+#else // _TARGET_*
+ // mov low dword into high dword (i.e. shift left by 32-bits)
+ inst_RV_RV(INS_mov, regHi, regLo);
+ if (count > 32)
+ {
+ // Shift high dword left by count - 32
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
+ }
+#endif // _TARGET_*
+ }
+ else // count >= 64
+ {
+ assert(count >= 64);
+ genSetRegToIcon(regHi, 0);
+ }
+ genSetRegToIcon(regLo, 0);
+ }
+ break;
+
+ case GT_RSH:
+ if (count == 0)
+ {
+ // regHi, regLo are correct
+ }
+ else if (count < 32)
+ {
+#if defined(_TARGET_XARCH_)
+ inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
+#elif defined(_TARGET_ARM_)
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
+ getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+#else // _TARGET_*
+ NYI("INS_shrd");
+#endif // _TARGET_*
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
+ }
+ else // count >= 32
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+#if defined(_TARGET_ARM_)
+ if (count == 32)
+ {
+ // mov high dword into low dword (i.e. shift right by 32-bits)
+ inst_RV_RV(INS_mov, regLo, regHi);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+ getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
+ count - 32);
+ }
+#else // _TARGET_*
+ // mov high dword into low dword (i.e. shift right by 32-bits)
+ inst_RV_RV(INS_mov, regLo, regHi);
+ if (count > 32)
+ {
+ // Shift low dword right by count - 32
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
+ }
+#endif // _TARGET_*
+ }
+
+ // Propagate sign bit in high dword
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
+
+ if (count >= 64)
+ {
+ // Propagate the sign from the high dword
+ inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
+ }
+ }
+ break;
+
+ case GT_RSZ:
+ if (count == 0)
+ {
+ // regHi, regLo are correct
+ }
+ else if (count < 32)
+ {
+#if defined(_TARGET_XARCH_)
+ inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
+#elif defined(_TARGET_ARM_)
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
+ getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+#else // _TARGET_*
+ NYI("INS_shrd");
+#endif // _TARGET_*
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
+ }
+ else // count >= 32
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+#if defined(_TARGET_ARM_)
+ if (count == 32)
+ {
+ // mov high dword into low dword (i.e. shift right by 32-bits)
+ inst_RV_RV(INS_mov, regLo, regHi);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+ getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
+ count - 32);
+ }
+#else // _TARGET_*
+ // mov high dword into low dword (i.e. shift right by 32-bits)
+ inst_RV_RV(INS_mov, regLo, regHi);
+ if (count > 32)
+ {
+ // Shift low dword right by count - 32
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
+ }
+#endif // _TARGET_*
+ }
+ else // count >= 64
+ {
+ assert(count >= 64);
+ genSetRegToIcon(regLo, 0);
+ }
+ genSetRegToIcon(regHi, 0);
+ }
+ break;
+
+ default:
+ noway_assert(!"Illegal oper for long shift");
+ break;
+ }
+
+ goto DONE_SHF;
+ }
+
+ /* Which operand are we supposed to compute first? */
+
+ assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* The second operand can't be a constant */
+
+ noway_assert(op2->gtOper != GT_CNS_INT);
+
+ /* Load the shift count, hopefully into RBM_SHIFT */
+ RegSet::ExactReg exactReg;
+ if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
+ exactReg = RegSet::EXACT_REG;
+ else
+ exactReg = RegSet::ANY_REG;
+ genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
+
+ /* Compute the left operand into REG_LNGARG_0 */
+
+ genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Lock op1 so that it doesn't get trashed */
+
+ regSet.rsLockUsedReg(RBM_LNGARG_0);
+
+ /* Make sure the shift count wasn't displaced */
+
+ genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
+
+ /* Lock op2 */
+
+ regSet.rsLockUsedReg(RBM_SHIFT_LNG);
+ }
+ else
+ {
+ /* Compute the left operand into REG_LNGARG_0 */
+
+ genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Compute the shift count into RBM_SHIFT */
+
+ genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+ /* Lock op2 */
+
+ regSet.rsLockUsedReg(RBM_SHIFT_LNG);
+
+ /* Make sure the value hasn't been displaced */
+
+ genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
+
+ /* Lock op1 so that it doesn't get trashed */
+
+ regSet.rsLockUsedReg(RBM_LNGARG_0);
+ }
+
+#ifndef _TARGET_X86_
+ /* The generic helper is a C-routine and so it follows the full ABI */
+ {
+ /* Spill any callee-saved registers which are being used */
+ regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
+
+ /* But do not spill our argument registers. */
+ spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
+
+ if (spillRegs)
+ {
+ regSet.rsSpillRegs(spillRegs);
+ }
+ }
+#endif // !_TARGET_X86_
+
+ /* Perform the shift by calling a helper function */
+
+ noway_assert(op1->gtRegPair == REG_LNGARG_0);
+ noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
+ noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
+
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_8BYTE); // retSize
+
+#ifdef _TARGET_X86_
+ /* The value in the register pair is trashed */
+
+ regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
+ regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
+#else // _TARGET_X86_
+ /* The generic helper is a C-routine and so it follows the full ABI */
+ regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
+#endif // _TARGET_X86_
+
+ /* Release both operands */
+
+ regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
+ genReleaseRegPair(op1);
+ genReleaseReg(op2);
+
+ DONE_SHF:
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regPair = op1->gtRegPair;
+ goto DONE;
+
+ case GT_NEG:
+ case GT_NOT:
+
+ /* Generate the operand into some register pair */
+
+ genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regPair = op1->gtRegPair;
+
+ /* Figure out which registers the value is in */
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* The value in the register pair is about to be trashed */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Unary "neg": negate the value in the register pair */
+ if (oper == GT_NEG)
+ {
+#ifdef _TARGET_ARM_
+
+ // ARM doesn't have an opcode that sets the carry bit like
+ // x86, so we can't use neg/addc/neg. Instead we use subtract
+ // with carry. Too bad this uses an extra register.
+
+ // Lock regLo and regHi so we don't pick them, and then pick
+ // a third register to be our 0.
+ regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
+ regSet.rsLockReg(regPairMask);
+ regMaskTP regBest = RBM_ALLINT & ~avoidReg;
+ regNumber regZero = genGetRegSetToIcon(0, regBest);
+ regSet.rsUnlockReg(regPairMask);
+
+ inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
+ getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
+
+#elif defined(_TARGET_XARCH_)
+
+ inst_RV(INS_NEG, regLo, TYP_LONG);
+ inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
+ inst_RV(INS_NEG, regHi, TYP_LONG);
+#else
+ NYI("GT_NEG on TYP_LONG");
+#endif
+ }
+ else
+ {
+ /* Unary "not": flip all the bits in the register pair */
+
+ inst_RV(INS_NOT, regLo, TYP_LONG);
+ inst_RV(INS_NOT, regHi, TYP_LONG);
+ }
+
+ goto DONE;
+
+#if LONG_ASG_OPS
+
+ case GT_ASG_OR:
+ insLo = insHi = INS_OR;
+ goto ASG_OPR;
+ case GT_ASG_XOR:
+ insLo = insHi = INS_XOR;
+ goto ASG_OPR;
+ case GT_ASG_AND:
+ insLo = insHi = INS_AND;
+ goto ASG_OPR;
+ case GT_ASG_SUB:
+ insLo = INS_sub;
+ insHi = INS_SUBC;
+ goto ASG_OPR;
+ case GT_ASG_ADD:
+ insLo = INS_add;
+ insHi = INS_ADDC;
+ goto ASG_OPR;
+
+ ASG_OPR:
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ __int64 lval = op2->gtLngCon.gtLconVal;
+
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
+
+ /* Optimize some special cases */
+
+ doLo = doHi = true;
+
+ /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
+
+ switch (oper)
+ {
+ case GT_ASG_AND:
+ if ((int)(lval) == -1)
+ doLo = false;
+ if ((int)(lval >> 32) == -1)
+ doHi = false;
+ break;
+
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ if (!(lval & 0x00000000FFFFFFFF))
+ doLo = false;
+ if (!(lval & 0xFFFFFFFF00000000))
+ doHi = false;
+ break;
+ }
+
+ if (doLo)
+ inst_TT_IV(insLo, op1, (int)(lval), 0);
+ if (doHi)
+ inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
+
+ bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
+ if (doLo || doHi)
+ tree->gtFlags |= GTF_ZSF_SET;
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+ goto DONE_ASSG_REGS;
+ }
+
+ /* TODO: allow non-const long assignment operators */
+
+ noway_assert(!"non-const long asgop NYI");
+
+#endif // LONG_ASG_OPS
+
+ case GT_IND:
+ case GT_NULLCHECK:
+ {
+ regMaskTP tmpMask;
+ int hiFirst;
+
+ regMaskTP availMask = RBM_ALLINT & ~needReg;
+
+ /* Make sure the operand is addressable */
+
+ addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
+
+ GenTreePtr addr = oper == GT_IND ? op1 : tree;
+
+ /* Pick a register for the value */
+
+ regPair = regSet.rsPickRegPair(needReg);
+ tmpMask = genRegPairMask(regPair);
+
+ /* Is there any overlap between the register pair and the address? */
+
+ hiFirst = FALSE;
+
+ if (tmpMask & addrReg)
+ {
+ /* Does one or both of the target registers overlap? */
+
+ if ((tmpMask & addrReg) != tmpMask)
+ {
+ /* Only one register overlaps */
+
+ noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
+
+ /* If the low register overlaps, load the upper half first */
+
+ if (addrReg & genRegMask(genRegPairLo(regPair)))
+ hiFirst = TRUE;
+ }
+ else
+ {
+ regMaskTP regFree;
+
+ /* The register completely overlaps with the address */
+
+ noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
+
+ /* Can we pick another pair easily? */
+
+ regFree = regSet.rsRegMaskFree() & ~addrReg;
+ if (needReg)
+ regFree &= needReg;
+
+ /* More than one free register available? */
+
+ if (regFree && !genMaxOneBit(regFree))
+ {
+ regPair = regSet.rsPickRegPair(regFree);
+ tmpMask = genRegPairMask(regPair);
+ }
+ else
+ {
+ // printf("Overlap: needReg = %08X\n", needReg);
+
+ // Reg-prediction won't allow this
+ noway_assert((regSet.rsMaskVars & addrReg) == 0);
+
+ // Grab one fresh reg, and use any one of addrReg
+
+ if (regFree) // Try to follow 'needReg'
+ regLo = regSet.rsGrabReg(regFree);
+ else // Pick any reg besides addrReg
+ regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
+
+ unsigned regBit = 0x1;
+ regNumber regNo;
+
+ for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
+ {
+ // Found one of addrReg. Use it.
+ if (regBit & addrReg)
+ break;
+ }
+ noway_assert(genIsValidReg(regNo)); // Should have found regNo
+
+ regPair = gen2regs2pair(regLo, regNo);
+ tmpMask = genRegPairMask(regPair);
+ }
+ }
+ }
+
+ /* Make sure the value is still addressable */
+
+ noway_assert(genStillAddressable(tree));
+
+ /* Figure out which registers the value is in */
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* The value in the register pair is about to be trashed */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Load the target registers from where the value is */
+
+ if (hiFirst)
+ {
+ inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
+ regSet.rsLockReg(genRegMask(regHi));
+ inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
+ regSet.rsUnlockReg(genRegMask(regHi));
+ }
+ else
+ {
+ inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
+ regSet.rsLockReg(genRegMask(regLo));
+ inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
+ regSet.rsUnlockReg(genRegMask(regLo));
+ }
+
+#ifdef _TARGET_ARM_
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ genUpdateLife(tree);
+ genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
+ }
+ goto DONE;
+
+ case GT_CAST:
+
+ /* What are we casting from? */
+
+ switch (op1->gtType)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_INT:
+ case TYP_UBYTE:
+ case TYP_BYREF:
+ {
+ regMaskTP hiRegMask;
+ regMaskTP loRegMask;
+
+ // For an unsigned cast we don't need to sign-extend the 32 bit value
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ // Does needReg have exactly two bits on and thus
+ // specifies the exact register pair that we want to use
+ if (!genMaxOneBit(needReg))
+ {
+ regPair = regSet.rsFindRegPairNo(needReg);
+ if (needReg != genRegPairMask(regPair))
+ goto ANY_FREE_REG_UNSIGNED;
+ loRegMask = genRegMask(genRegPairLo(regPair));
+ if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
+ goto ANY_FREE_REG_UNSIGNED;
+ hiRegMask = genRegMask(genRegPairHi(regPair));
+ }
+ else
+ {
+ ANY_FREE_REG_UNSIGNED:
+ loRegMask = needReg;
+ hiRegMask = needReg;
+ }
+
+ genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regLo = op1->gtRegNum;
+ loRegMask = genRegMask(regLo);
+ regSet.rsLockUsedReg(loRegMask);
+ regHi = regSet.rsPickReg(hiRegMask);
+ regSet.rsUnlockUsedReg(loRegMask);
+
+ regPair = gen2regs2pair(regLo, regHi);
+
+ // Move 0 to the higher word of the ULong
+ genSetRegToIcon(regHi, 0, TYP_INT);
+
+ /* We can now free up the operand */
+ genReleaseReg(op1);
+
+ goto DONE;
+ }
+#ifdef _TARGET_XARCH_
+ /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
+ and we need the result to be in those registers.
+ cdq is smaller so we use it for SMALL_CODE
+ */
+
+ if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
+ (regSet.rsRegMaskFree() & RBM_EDX))
+ {
+ genCodeForTree(op1, RBM_EAX);
+ regSet.rsMarkRegUsed(op1);
+
+ /* If we have to spill EDX, might as well use the faster
+ sar as the spill will increase code size anyway */
+
+ if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
+ {
+ hiRegMask = regSet.rsRegMaskFree();
+ goto USE_SAR_FOR_CAST;
+ }
+
+ regSet.rsGrabReg(RBM_EDX);
+ regTracker.rsTrackRegTrash(REG_EDX);
+
+ /* Convert the int in EAX into a long in EDX:EAX */
+
+ instGen(INS_cdq);
+
+ /* The result is in EDX:EAX */
+
+ regPair = REG_PAIR_EAXEDX;
+ }
+ else
+#endif
+ {
+ /* use the sar instruction to sign-extend a 32-bit integer */
+
+ // Does needReg have exactly two bits on and thus
+ // specifies the exact register pair that we want to use
+ if (!genMaxOneBit(needReg))
+ {
+ regPair = regSet.rsFindRegPairNo(needReg);
+ if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
+ goto ANY_FREE_REG_SIGNED;
+ loRegMask = genRegMask(genRegPairLo(regPair));
+ if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
+ goto ANY_FREE_REG_SIGNED;
+ hiRegMask = genRegMask(genRegPairHi(regPair));
+ }
+ else
+ {
+ ANY_FREE_REG_SIGNED:
+ loRegMask = needReg;
+ hiRegMask = RBM_NONE;
+ }
+
+ genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
+#ifdef _TARGET_XARCH_
+ USE_SAR_FOR_CAST:
+#endif
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regLo = op1->gtRegNum;
+ loRegMask = genRegMask(regLo);
+ regSet.rsLockUsedReg(loRegMask);
+ regHi = regSet.rsPickReg(hiRegMask);
+ regSet.rsUnlockUsedReg(loRegMask);
+
+ regPair = gen2regs2pair(regLo, regHi);
+
+#ifdef _TARGET_ARM_
+ /* Copy the lo32 bits from regLo to regHi and sign-extend it */
+ // Use one instruction instead of two
+ getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
+#else
+ /* Copy the lo32 bits from regLo to regHi and sign-extend it */
+ inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
+#endif
+
+ /* The value in the upper register is trashed */
+
+ regTracker.rsTrackRegTrash(regHi);
+ }
+
+ /* We can now free up the operand */
+ genReleaseReg(op1);
+
+ // conv.ovf.u8 could overflow if the original number was negative
+ if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
+ {
+ regNumber hiReg = genRegPairHi(regPair);
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ }
+ }
+ goto DONE;
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+#if 0
+ /* Load the FP value onto the coprocessor stack */
+
+ genCodeForTreeFlt(op1);
+
+ /* Allocate a temp for the long value */
+
+ temp = compiler->tmpGetTemp(TYP_LONG);
+
+ /* Store the FP value into the temp */
+
+ inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
+ genFPstkLevel--;
+
+ /* Pick a register pair for the value */
+
+ regPair = regSet.rsPickRegPair(needReg);
+
+ /* Figure out which registers the value is in */
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* The value in the register pair is about to be trashed */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Load the converted value into the registers */
+
+ inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
+ inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
+
+ /* We no longer need the temp */
+
+ compiler->tmpRlsTemp(temp);
+ goto DONE;
+#else
+ NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
+ break;
+#endif
+ case TYP_LONG:
+ case TYP_ULONG:
+ {
+ noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
+
+ genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
+ regPair = op1->gtRegPair;
+
+ // Do we need to set the sign-flag, or can we checked if it is set?
+ // and not do this "test" if so.
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ regNumber hiReg = genRegPairHi(op1->gtRegPair);
+ noway_assert(hiReg != REG_STK);
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
+ }
+
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ }
+ goto DONE;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ NO_WAY("unexpected cast to long");
+ }
+ break;
+
+ case GT_RETURN:
+
+ /* TODO:
+ * This code is cloned from the regular processing of GT_RETURN values. We have to remember to
+ * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really
+ * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
+ */
+
+ // TODO: this should be done AFTER we called exit mon so that
+ // we are sure that we don't have to keep 'this' alive
+
+ if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+ {
+ /* either it's an "empty" statement or the return statement
+ of a synchronized method
+ */
+
+ genPInvokeMethodEpilog();
+ }
+
+#if CPU_LONG_USES_REGPAIR
+ /* There must be a long return value */
+
+ noway_assert(op1);
+
+ /* Evaluate the return value into EDX:EAX */
+
+ genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegPair == REG_LNGRET);
+
+#else
+ NYI("64-bit return");
+#endif
+
+#ifdef PROFILING_SUPPORTED
+ // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+ // the GT_RETURN tree.
+
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+ return;
+
+ case GT_QMARK:
+ noway_assert(!"inliner-generated ?: for longs NYI");
+ NO_WAY("inliner-generated ?: for longs NYI");
+ break;
+
+ case GT_COMMA:
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ // Generate op2
+ genCodeForTreeLng(op2, needReg, avoidReg);
+ genUpdateLife(op2);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ regSet.rsMarkRegPairUsed(op2);
+
+ // Do side effects of op1
+ genEvalSideEffects(op1);
+
+ // Recover op2 if spilled
+ genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
+
+ genReleaseRegPair(op2);
+
+ genUpdateLife(tree);
+
+ regPair = op2->gtRegPair;
+ }
+ else
+ {
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Generate side effects of the first operand */
+
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+
+ /* Is the value of the second operand used? */
+
+ if (tree->gtType == TYP_VOID)
+ {
+ /* The right operand produces no result */
+
+ genEvalSideEffects(op2);
+ genUpdateLife(tree);
+ return;
+ }
+
+ /* Generate the second operand, i.e. the 'real' value */
+
+ genCodeForTreeLng(op2, needReg, avoidReg);
+
+ /* The result of 'op2' is also the final result */
+
+ regPair = op2->gtRegPair;
+ }
+
+ goto DONE;
+
+ case GT_BOX:
+ {
+ /* Generate the operand, i.e. the 'real' value */
+
+ genCodeForTreeLng(op1, needReg, avoidReg);
+
+ /* The result of 'op1' is also the final result */
+
+ regPair = op1->gtRegPair;
+ }
+
+ goto DONE;
+
+ case GT_NOP:
+ if (op1 == NULL)
+ return;
+
+ genCodeForTreeLng(op1, needReg, avoidReg);
+ regPair = op1->gtRegPair;
+ goto DONE;
+
+ default:
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected 64-bit operator");
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ regMaskTP retMask;
+ case GT_CALL:
+ retMask = genCodeForCall(tree, true);
+ if (retMask == RBM_NONE)
+ regPair = REG_PAIR_NONE;
+ else
+ regPair = regSet.rsFindRegPairNo(retMask);
+ break;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ NO_WAY("unexpected long operator");
+ }
+
+DONE:
+
+ genUpdateLife(tree);
+
+ /* Here we've computed the value of 'tree' into 'regPair' */
+
+ noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
+
+ genMarkTreeInRegPair(tree, regPair);
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Generate code for a mod of a long by an int.
+ */
+
+regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg)
+{
+#ifdef _TARGET_X86_
+
+ regPairNo regPair;
+ regMaskTP addrReg;
+
+ genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ /* Codegen only for Unsigned MOD */
+ noway_assert(oper == GT_UMOD);
+
+ /* op2 must be a long constant in the range 2 to 0x3fffffff */
+
+ noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
+ (op2->gtLngCon.gtLconVal <= 0x3fffffff));
+ int val = (int)op2->gtLngCon.gtLconVal;
+
+ op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
+
+ op2->gtType = TYP_INT;
+ op2->gtIntCon.gtIconVal = val;
+
+ /* Which operand are we supposed to compute first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Compute the second operand into a scratch register, other
+ than EAX or EDX */
+
+ needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
+
+ /* Special case: if op2 is a local var we are done */
+
+ if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
+ {
+ addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
+ }
+ else
+ {
+ genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ addrReg = genRegMask(op2->gtRegNum);
+ }
+
+ /* Compute the first operand into EAX:EDX */
+
+ genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegPair == REG_PAIR_TMP);
+
+ /* And recover the second argument while locking the first one */
+
+ addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
+ }
+ else
+ {
+ /* Compute the first operand into EAX:EDX */
+
+ genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegPair == REG_PAIR_TMP);
+
+ /* Compute the second operand into a scratch register, other
+ than EAX or EDX */
+
+ needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
+
+ /* Special case: if op2 is a local var we are done */
+
+ if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
+ {
+ addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
+ }
+ else
+ {
+ genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ addrReg = genRegMask(op2->gtRegNum);
+ }
+
+ /* Recover the first argument */
+
+ genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
+
+ /* And recover the second argument while locking the first one */
+
+ addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
+ }
+
+ /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
+ contains the 32bit divisor. We want to generate the following code:
+
+ ==========================
+ Unsigned (GT_UMOD)
+
+ cmp edx, op2->gtRegNum
+ jb lab_no_overflow
+
+ mov temp, eax
+ mov eax, edx
+ xor edx, edx
+ div op2->g2RegNum
+ mov eax, temp
+
+ lab_no_overflow:
+ idiv
+ ==========================
+ This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
+ */
+
+ BasicBlock* lab_no_overflow = genCreateTempLabel();
+
+ // grab a temporary register other than eax, edx, and op2->gtRegNum
+
+ regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
+
+ // EAX and tempReg will be trashed by the mov instructions. Doing
+ // this early won't hurt, and might prevent confusion in genSetRegToIcon.
+
+ regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
+ regTracker.rsTrackRegTrash(tempReg);
+
+ inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
+ inst_JMP(EJ_jb, lab_no_overflow);
+
+ inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
+ inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
+ genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
+ inst_TT(INS_UNSIGNED_DIVIDE, op2);
+ inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
+
+ // Jump point for no overflow divide
+
+ genDefineTempLabel(lab_no_overflow);
+
+ // Issue the divide instruction
+
+ inst_TT(INS_UNSIGNED_DIVIDE, op2);
+
+ /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
+
+ regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
+ regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
+ regTracker.rsTrackRegTrash(tempReg);
+ regTracker.rsTrackRegTrash(op2->gtRegNum);
+
+ if (tree->gtFlags & GTF_MOD_INT_RESULT)
+ {
+ /* We don't need to normalize the result, because the caller wants
+ an int (in edx) */
+
+ regPair = REG_PAIR_TMP_REVERSE;
+ }
+ else
+ {
+ /* The result is now in EDX, we now have to normalize it, i.e. we have
+ to issue:
+ mov eax, edx; xor edx, edx (for UMOD)
+ */
+
+ inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
+
+ genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
+
+ regPair = REG_PAIR_TMP;
+ }
+
+ genReleaseRegPair(op1);
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ return regPair;
+
+#else // !_TARGET_X86_
+
+ NYI("codegen for LongModInt");
+
+ return REG_PAIR_NONE;
+
+#endif // !_TARGET_X86_
+}
+
+// Given a tree, return the number of registers that are currently
+// used to hold integer enregistered local variables.
+// Note that, an enregistered TYP_LONG can take 1 or 2 registers.
+unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
+{
+ unsigned regCount = 0;
+
+ VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
+ while (iter.NextElem(compiler, &varNum))
+ {
+ unsigned lclNum = compiler->lvaTrackedToVarNum[varNum];
+ LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
+
+ if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
+ {
+ ++regCount;
+
+ if (varTypeIsLong(varDsc->TypeGet()))
+ {
+ // For enregistered LONG/ULONG, the lower half should always be in a register.
+ noway_assert(varDsc->lvRegNum != REG_STK);
+
+ // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
+ // well.
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ ++regCount;
+ }
+ }
+ }
+ }
+
+ return regCount;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+#if CPU_HAS_FP_SUPPORT
+/*****************************************************************************
+ *
+ * Generate code for a floating-point operation.
+ */
+
+void CodeGen::genCodeForTreeFlt(GenTreePtr tree,
+ regMaskTP needReg, /* = RBM_ALLFLOAT */
+ regMaskTP bestReg) /* = RBM_NONE */
+{
+ genCodeForTreeFloat(tree, needReg, bestReg);
+
+ if (tree->OperGet() == GT_RETURN)
+ {
+ // Make sure to get ALL THE EPILOG CODE
+
+ // TODO: this should be done AFTER we called exit mon so that
+ // we are sure that we don't have to keep 'this' alive
+
+ if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+ {
+ /* either it's an "empty" statement or the return statement
+ of a synchronized method
+ */
+
+ genPInvokeMethodEpilog();
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+ // the GT_RETURN tree.
+
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+ }
+}
+
+/*****************************************************************************/
+#endif // CPU_HAS_FP_SUPPORT
+
+/*****************************************************************************
+ *
+ * Generate a table switch - the switch value (0-based) is in register 'reg'.
+ */
+
+void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
+{
+ unsigned jmpTabBase;
+
+ if (jumpCnt == 1)
+ {
+ // In debug code, we don't optimize away the trivial switch statements. So we can get here with a
+ // BBJ_SWITCH with only a default case. Therefore, don't generate the switch table.
+ noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+ inst_JMP(EJ_jmp, jumpTab[0]);
+ return;
+ }
+
+ noway_assert(jumpCnt >= 2);
+
+ /* Is the number of cases right for a test and jump switch? */
+
+ const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
+ const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
+ const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
+
+ unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
+
+ // This means really just a single cmp/jcc (aka a simple if/else)
+ if (fFirstCaseFollows || fDefaultFollows)
+ minSwitchTabJumpCnt++;
+
+#ifdef _TARGET_ARM_
+ // On the ARM for small switch tables we will
+ // generate a sequence of compare and branch instructions
+ // because the code to load the base of the switch
+ // table is huge and hideous due to the relocation... :(
+ //
+ minSwitchTabJumpCnt++;
+ if (fHaveScratchReg)
+ minSwitchTabJumpCnt++;
+
+#endif // _TARGET_ARM_
+
+ if (jumpCnt < minSwitchTabJumpCnt)
+ {
+ /* Does the first case label follow? */
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+
+ if (fFirstCaseFollows)
+ {
+ /* Check for the default case */
+ inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
+
+ /* No need to jump to the first case */
+
+ jumpCnt -= 2;
+ jumpTab += 1;
+
+ /* Generate a series of "dec reg; jmp label" */
+
+ // Make sure that we can trash the register so
+ // that we can generate a series of compares and jumps
+ //
+ if ((jumpCnt > 0) && !fHaveScratchReg)
+ {
+ regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
+ inst_RV_RV(INS_mov, tmpReg, reg);
+ regTracker.rsTrackRegTrash(tmpReg);
+ reg = tmpReg;
+ }
+
+ while (jumpCnt > 0)
+ {
+ inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
+ inst_JMP(jmpEqual, *jumpTab++);
+ jumpCnt--;
+ }
+ }
+ else
+ {
+ /* Check for case0 first */
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
+ inst_JMP(jmpEqual, *jumpTab);
+
+ /* No need to jump to the first case or the default */
+
+ jumpCnt -= 2;
+ jumpTab += 1;
+
+ /* Generate a series of "dec reg; jmp label" */
+
+ // Make sure that we can trash the register so
+ // that we can generate a series of compares and jumps
+ //
+ if ((jumpCnt > 0) && !fHaveScratchReg)
+ {
+ regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
+ inst_RV_RV(INS_mov, tmpReg, reg);
+ regTracker.rsTrackRegTrash(tmpReg);
+ reg = tmpReg;
+ }
+
+ while (jumpCnt > 0)
+ {
+ inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
+ inst_JMP(jmpEqual, *jumpTab++);
+ jumpCnt--;
+ }
+
+ if (!fDefaultFollows)
+ {
+ inst_JMP(EJ_jmp, *jumpTab);
+ }
+ }
+
+ if ((fFirstCaseFollows || fDefaultFollows) &&
+ compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
+ {
+ inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
+ }
+
+ return;
+ }
+
+ /* First take care of the default case */
+
+ inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
+
+ /* Generate the jump table contents */
+
+ jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+#endif
+
+ for (unsigned index = 0; index < jumpCnt - 1; index++)
+ {
+ BasicBlock* target = jumpTab[index];
+
+ noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+#endif
+
+ getEmitter()->emitDataGenData(index, target);
+ }
+
+ getEmitter()->emitDataGenEnd();
+
+#ifdef _TARGET_ARM_
+ // We need to load the address of the table into a register.
+ // The data section might get placed a long distance away, so we
+ // can't safely do a PC-relative ADR. :(
+ // Pick any register except the index register.
+ //
+ regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
+ getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
+ regTracker.rsTrackRegTrash(regTabBase);
+
+ // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
+ getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
+
+#else // !_TARGET_ARM_
+
+ getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a switch statement.
+ */
+
+void CodeGen::genCodeForSwitch(GenTreePtr tree)
+{
+ unsigned jumpCnt;
+ BasicBlock** jumpTab;
+
+ GenTreePtr oper;
+ regNumber reg;
+
+ noway_assert(tree->gtOper == GT_SWITCH);
+ oper = tree->gtOp.gtOp1;
+ noway_assert(genActualTypeIsIntOrI(oper->gtType));
+
+ /* Get hold of the jump table */
+
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+
+ jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
+ jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+
+ /* Compute the switch value into some register */
+
+ genCodeForTree(oper, 0);
+
+ /* Get hold of the register the value is in */
+
+ noway_assert(oper->gtFlags & GTF_REG_VAL);
+ reg = oper->gtRegNum;
+
+#if FEATURE_STACK_FP_X87
+ if (!compCurFPState.IsEmpty())
+ {
+ return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
+ }
+ else
+#endif // FEATURE_STACK_FP_X87
+ {
+ return genTableSwitch(reg, jumpCnt, jumpTab);
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ * Emit a call to a helper function.
+ */
+
+// inline
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
+{
+ // Can we call the helper function directly
+
+ void *addr = NULL, **pAddr = NULL;
+
+#if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
+ // Don't ask VM if it hasn't requested ELT hooks
+ if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
+ (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
+ helper == CORINFO_HELP_PROF_FCN_TAILCALL))
+ {
+ addr = compiler->compProfilerMethHnd;
+ }
+ else
+#endif
+ {
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
+ }
+
+#ifdef _TARGET_ARM_
+ if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ // Load the address into a register and call through a register
+ regNumber indCallReg =
+ regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
+ if (addr)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
+ regTracker.rsTrackRegTrash(indCallReg);
+ }
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
+ argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // ilOffset
+ indCallReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helper),
+ (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+ else
+ {
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
+ 0, /* ilOffset, ireg, xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helper),
+ (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+#else
+
+ {
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+
+ if (!addr)
+ {
+ callType = emitter::EC_FUNC_TOKEN_INDIR;
+ addr = pAddr;
+ }
+
+ getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
+ argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
+ 0, /* ilOffset, ireg, xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helper));
+ }
+#endif
+
+ regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
+ regTracker.rsTrashRegsForGCInterruptability();
+}
+
+/*****************************************************************************
+ *
+ * Push the given registers.
+ * This function does not check if the register is marked as used, etc.
+ */
+
+regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
+{
+ *byrefRegs = RBM_NONE;
+ *noRefRegs = RBM_NONE;
+
+ // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+
+ if (regs == RBM_NONE)
+ return RBM_NONE;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ NYI("Don't call genPushRegs with real regs!");
+ return RBM_NONE;
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+ noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
+ noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
+
+ regMaskTP pushedRegs = regs;
+
+ for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
+ {
+ regMaskTP regBit = regMaskTP(1) << reg;
+
+ if ((regBit & regs) == RBM_NONE)
+ continue;
+
+ var_types type;
+ if (regBit & gcInfo.gcRegGCrefSetCur)
+ {
+ type = TYP_REF;
+ }
+ else if (regBit & gcInfo.gcRegByrefSetCur)
+ {
+ *byrefRegs |= regBit;
+ type = TYP_BYREF;
+ }
+ else if (noRefRegs != NULL)
+ {
+ *noRefRegs |= regBit;
+ type = TYP_I_IMPL;
+ }
+ else
+ {
+ continue;
+ }
+
+ inst_RV(INS_push, reg, type);
+
+ genSinglePush();
+ gcInfo.gcMarkRegSetNpt(regBit);
+
+ regs &= ~regBit;
+ }
+
+ return pushedRegs;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Pop the registers pushed by genPushRegs()
+ */
+
+void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
+{
+ if (regs == RBM_NONE)
+ return;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ NYI("Don't call genPopRegs with real regs!");
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+ noway_assert((regs & byrefRegs) == byrefRegs);
+ noway_assert((regs & noRefRegs) == noRefRegs);
+ // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+ noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
+
+ noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
+ noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
+
+ // Walk the registers in the reverse order as genPushRegs()
+ for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
+ {
+ regMaskTP regBit = regMaskTP(1) << reg;
+
+ if ((regBit & regs) == RBM_NONE)
+ continue;
+
+ var_types type;
+ if (regBit & byrefRegs)
+ {
+ type = TYP_BYREF;
+ }
+ else if (regBit & noRefRegs)
+ {
+ type = TYP_INT;
+ }
+ else
+ {
+ type = TYP_REF;
+ }
+
+ inst_RV(INS_pop, reg, type);
+ genSinglePop();
+
+ if (type != TYP_INT)
+ gcInfo.gcMarkRegPtrVal(reg, type);
+
+ regs &= ~regBit;
+ }
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Push the given argument list, right to left; returns the total amount of
+ * stuff pushed.
+ */
+
+#if !FEATURE_FIXED_OUT_ARGS
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+size_t CodeGen::genPushArgList(GenTreePtr call)
+{
+ GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs;
+ size_t size = 0;
+ regMaskTP addrReg;
+
+ GenTreeArgList* args;
+ // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
+ // so we can iterate over this argument list more uniformly.
+ // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
+ GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
+ if (call->gtCall.gtCallObjp == NULL)
+ {
+ args = call->gtCall.gtCallArgs;
+ }
+ else
+ {
+ firstForObjp.Current() = call->gtCall.gtCallObjp;
+ args = &firstForObjp;
+ }
+
+ GenTreePtr curr;
+ var_types type;
+ size_t opsz;
+
+ for (; args; args = args->Rest())
+ {
+ addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
+
+ /* Get hold of the next argument value */
+ curr = args->Current();
+
+ if (curr->IsArgPlaceHolderNode())
+ {
+ assert(curr->gtFlags & GTF_LATE_ARG);
+
+ addrReg = 0;
+ continue;
+ }
+
+ // If we have a comma expression, eval the non-last, then deal with the last.
+ if (!(curr->gtFlags & GTF_LATE_ARG))
+ curr = genCodeForCommaTree(curr);
+
+ /* See what type of a value we're passing */
+ type = curr->TypeGet();
+
+ opsz = genTypeSize(genActualType(type));
+
+ switch (type)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_SHORT:
+ case TYP_CHAR:
+ case TYP_UBYTE:
+
+ /* Don't want to push a small value, make it a full word */
+
+ genCodeForTree(curr, 0);
+
+ __fallthrough; // now the value should be in a register ...
+
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+#endif
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ assert(curr->gtOper == GT_ASG);
+ /* one more argument will be passed in a register */
+ noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
+
+ /* arg is passed in the register, nothing on the stack */
+
+ opsz = 0;
+ }
+
+ /* Is this value a handle? */
+
+ if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
+ {
+ /* Emit a fixup for the push instruction */
+
+ inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
+ genSinglePush();
+
+ addrReg = 0;
+ break;
+ }
+
+ /* Is the value a constant? */
+
+ if (curr->gtOper == GT_CNS_INT)
+ {
+
+#if REDUNDANT_LOAD
+ regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
+
+ if (reg != REG_NA)
+ {
+ inst_RV(INS_push, reg, TYP_INT);
+ }
+ else
+#endif
+ {
+ inst_IV(INS_push, curr->gtIntCon.gtIconVal);
+ }
+
+ /* If the type is TYP_REF, then this must be a "null". So we can
+ treat it as a TYP_INT as we don't need to report it as a GC ptr */
+
+ noway_assert(curr->TypeGet() == TYP_INT ||
+ (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
+
+ genSinglePush();
+
+ addrReg = 0;
+ break;
+ }
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ /* This must be a register arg temp assignment */
+
+ noway_assert(curr->gtOper == GT_ASG);
+
+ /* Evaluate it to the temp */
+
+ genCodeForTree(curr, 0);
+
+ /* Increment the current argument register counter */
+
+ intRegState.rsCurRegArgNum++;
+
+ addrReg = 0;
+ }
+ else
+ {
+ /* This is a 32-bit integer non-register argument */
+
+ addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
+ inst_TT(INS_push, curr);
+ genSinglePush();
+ genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
+ }
+ break;
+
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+
+ /* Is the value a constant? */
+
+ if (curr->gtOper == GT_CNS_LNG)
+ {
+ inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
+ genSinglePush();
+ inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
+ genSinglePush();
+
+ addrReg = 0;
+ }
+ else
+ {
+ addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
+
+ inst_TT(INS_push, curr, sizeof(int));
+ genSinglePush();
+ inst_TT(INS_push, curr);
+ genSinglePush();
+ }
+ break;
+
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+#endif
+#if FEATURE_STACK_FP_X87
+ addrReg = genPushArgumentStackFP(curr);
+#else
+ NYI("FP codegen");
+ addrReg = 0;
+#endif
+ break;
+
+ case TYP_VOID:
+
+ /* Is this a nothing node, deferred register argument? */
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ GenTree* arg = curr;
+ if (arg->gtOper == GT_COMMA)
+ {
+ while (arg->gtOper == GT_COMMA)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ arg = arg->gtOp.gtOp2;
+ }
+ if (!arg->IsNothingNode())
+ {
+ genEvalSideEffects(arg);
+ genUpdateLife(arg);
+ }
+ }
+
+ /* increment the register count and continue with the next argument */
+
+ intRegState.rsCurRegArgNum++;
+
+ noway_assert(opsz == 0);
+
+ addrReg = 0;
+ break;
+ }
+
+ __fallthrough;
+
+ case TYP_STRUCT:
+ {
+ GenTree* arg = curr;
+ while (arg->gtOper == GT_COMMA)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ arg = arg->gtOp.gtOp2;
+ }
+
+ noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
+ noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
+ noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
+
+ if (arg->gtOper == GT_MKREFANY)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ GenTreePtr op2 = arg->gtOp.gtOp2;
+
+ addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
+
+ /* Is this value a handle? */
+ if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
+ {
+ /* Emit a fixup for the push instruction */
+
+ inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
+ genSinglePush();
+ }
+ else
+ {
+ regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
+ inst_TT(INS_push, op2);
+ genSinglePush();
+ genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
+ }
+ addrReg = genKeepAddressable(op1, addrReg);
+ inst_TT(INS_push, op1);
+ genSinglePush();
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ opsz = 2 * TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ noway_assert(arg->gtOper == GT_OBJ);
+
+ if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
+ unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
+
+ // As much as we would like this to be a noway_assert, we can't because
+ // there are some weird casts out there, and backwards compatiblity
+ // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
+ // lvPromoted in general currently do not require the local to be
+ // TYP_STRUCT, so this assert is really more about how we wish the world
+ // was then some JIT invariant.
+ assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+ if (varDsc->lvPromoted &&
+ promotionType ==
+ Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
+ {
+ assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+
+ addrReg = 0;
+
+ // Get the number of BYTES to copy to the stack
+ opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
+ size_t bytesToBeCopied = opsz;
+
+ // postponedFields is true if we have any postponed fields
+ // Any field that does not start on a 4-byte boundary is a postponed field
+ // Such a field is required to be a short or a byte
+ //
+ // postponedRegKind records the kind of scratch register we will
+ // need to process the postponed fields
+ // RBM_NONE means that we don't need a register
+ //
+ // expectedAlignedOffset records the aligned offset that
+ // has to exist for a push to cover the postponed fields.
+ // Since all promoted structs have the tightly packed property
+ // we are guaranteed that we will have such a push
+ //
+ bool postponedFields = false;
+ regMaskTP postponedRegKind = RBM_NONE;
+ size_t expectedAlignedOffset = UINT_MAX;
+
+ VARSET_TP* deadVarBits = NULL;
+ compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
+
+ // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
+ //
+ for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
+ varNum >= (int)varDsc->lvFieldLclStart; varNum--)
+ {
+ LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
+#ifdef DEBUG
+ if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
+ {
+ noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
+ noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
+ }
+#endif
+ // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
+ // For packed structs we will go back and store the unaligned bytes and shorts
+ // in the next loop
+ //
+ if (fieldVarDsc->lvStackAligned())
+ {
+ if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
+ fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
+ {
+ // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
+ // Just push some junk (i.e EAX) on the stack.
+ inst_RV(INS_push, REG_EAX, TYP_INT);
+ genSinglePush();
+
+ bytesToBeCopied -= sizeof(void*);
+ }
+
+ // If we have an expectedAlignedOffset make sure that this push instruction
+ // is what we expect to cover the postponedFields
+ //
+ if (expectedAlignedOffset != UINT_MAX)
+ {
+ // This push must be for a small field
+ noway_assert(fieldVarDsc->lvExactSize < 4);
+ // The fldOffset for this push should be equal to the expectedAlignedOffset
+ noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
+ expectedAlignedOffset = UINT_MAX;
+ }
+
+ // Push the "upper half" of LONG var first
+
+ if (isRegPairType(fieldVarDsc->lvType))
+ {
+ if (fieldVarDsc->lvOtherReg != REG_STK)
+ {
+ inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
+ genSinglePush();
+
+ // Prepare the set of vars to be cleared from gcref/gcbyref set
+ // in case they become dead after genUpdateLife.
+ // genDoneAddressable() will remove dead gc vars by calling
+ // gcInfo.gcMarkRegSetNpt.
+ // Although it is not addrReg, we just borrow the name here.
+ addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
+ genSinglePush();
+ }
+
+ bytesToBeCopied -= sizeof(void*);
+ }
+
+ // Push the "upper half" of DOUBLE var if it is not enregistered.
+
+ if (fieldVarDsc->lvType == TYP_DOUBLE)
+ {
+ if (!fieldVarDsc->lvRegister)
+ {
+ getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
+ genSinglePush();
+ }
+
+ bytesToBeCopied -= sizeof(void*);
+ }
+
+ //
+ // Push the field local.
+ //
+
+ if (fieldVarDsc->lvRegister)
+ {
+ if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
+ {
+ inst_RV(INS_push, fieldVarDsc->lvRegNum,
+ genActualType(fieldVarDsc->TypeGet()));
+ genSinglePush();
+
+ // Prepare the set of vars to be cleared from gcref/gcbyref set
+ // in case they become dead after genUpdateLife.
+ // genDoneAddressable() will remove dead gc vars by calling
+ // gcInfo.gcMarkRegSetNpt.
+ // Although it is not addrReg, we just borrow the name here.
+ addrReg |= genRegMask(fieldVarDsc->lvRegNum);
+ }
+ else
+ {
+ // Must be TYP_FLOAT or TYP_DOUBLE
+ noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
+
+ noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
+ fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
+
+ inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
+
+ genSinglePush();
+ if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
+ {
+ genSinglePush();
+ }
+
+#if FEATURE_STACK_FP_X87
+ GenTree* fieldTree = new (compiler, GT_REG_VAR)
+ GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
+ fieldTree->gtOper = GT_REG_VAR;
+ fieldTree->gtRegNum = fieldVarDsc->lvRegNum;
+ fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
+ if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ if (fieldVarDsc->lvTracked &&
+ (deadVarBits == NULL ||
+ VarSetOps::IsMember(compiler, *deadVarBits,
+ fieldVarDsc->lvVarIndex)))
+ {
+ fieldTree->gtFlags |= GTF_VAR_DEATH;
+ }
+ }
+ genCodeForTreeStackFP_Leaf(fieldTree);
+
+ // Take reg to top of stack
+
+ FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
+
+ // Pop it off to stack
+ compCurFPState.Pop();
+
+ getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
+ REG_NA, REG_SPBASE, 0);
+#else
+ NYI_FLAT_FP_X87("FP codegen");
+#endif
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_S(INS_push,
+ (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
+ : EA_4BYTE,
+ varNum, 0);
+ genSinglePush();
+ }
+
+ bytesToBeCopied -= sizeof(void*);
+ }
+ else // not stack aligned
+ {
+ noway_assert(fieldVarDsc->lvExactSize < 4);
+
+ // We will need to use a store byte or store word
+ // to set this unaligned location
+ postponedFields = true;
+
+ if (expectedAlignedOffset != UINT_MAX)
+ {
+ // This should never change until it is set back to UINT_MAX by an aligned
+ // offset
+ noway_assert(expectedAlignedOffset ==
+ roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
+ }
+
+ expectedAlignedOffset =
+ roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
+
+ noway_assert(expectedAlignedOffset < bytesToBeCopied);
+
+ if (fieldVarDsc->lvRegister)
+ {
+ // Do we need to use a byte-able register?
+ if (fieldVarDsc->lvExactSize == 1)
+ {
+ // Did we enregister fieldVarDsc2 in a non byte-able register?
+ if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
+ {
+ // then we will need to grab a byte-able register
+ postponedRegKind = RBM_BYTE_REGS;
+ }
+ }
+ }
+ else // not enregistered
+ {
+ if (fieldVarDsc->lvExactSize == 1)
+ {
+ // We will need to grab a byte-able register
+ postponedRegKind = RBM_BYTE_REGS;
+ }
+ else
+ {
+ // We will need to grab any scratch register
+ if (postponedRegKind != RBM_BYTE_REGS)
+ postponedRegKind = RBM_ALLINT;
+ }
+ }
+ }
+ }
+
+ // Now we've pushed all of the aligned fields.
+ //
+ // We should have pushed bytes equal to the entire struct
+ noway_assert(bytesToBeCopied == 0);
+
+ // We should have seen a push that covers every postponed field
+ noway_assert(expectedAlignedOffset == UINT_MAX);
+
+ // Did we have any postponed fields?
+ if (postponedFields)
+ {
+ regNumber regNum = REG_STK; // means no register
+
+ // If we needed a scratch register then grab it here
+
+ if (postponedRegKind != RBM_NONE)
+ regNum = regSet.rsGrabReg(postponedRegKind);
+
+ // Forward loop, starts from the lowest field offset
+ //
+ for (unsigned varNum = varDsc->lvFieldLclStart;
+ varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
+ {
+ LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
+
+ // All stack aligned fields have already been pushed
+ if (fieldVarDsc->lvStackAligned())
+ continue;
+
+ // We have a postponed field
+
+ // It must be a byte or a short
+ noway_assert(fieldVarDsc->lvExactSize < 4);
+
+ // Is the field enregistered?
+ if (fieldVarDsc->lvRegister)
+ {
+ // Frequently we can just use that register
+ regNumber tmpRegNum = fieldVarDsc->lvRegNum;
+
+ // Do we need to use a byte-able register?
+ if (fieldVarDsc->lvExactSize == 1)
+ {
+ // Did we enregister the field in a non byte-able register?
+ if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
+ {
+ // then we will need to use the byte-able register 'regNum'
+ noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
+
+ // Copy the register that contains fieldVarDsc into 'regNum'
+ getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
+ fieldVarDsc->lvRegNum);
+ regTracker.rsTrackRegLclVar(regNum, varNum);
+
+ // tmpRegNum is the register that we will extract the byte value from
+ tmpRegNum = regNum;
+ }
+ noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
+ }
+
+ getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
+ (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
+ REG_SPBASE, fieldVarDsc->lvFldOffset);
+ }
+ else // not enregistered
+ {
+ // We will copy the non-enregister fieldVar into our scratch register 'regNum'
+
+ noway_assert(regNum != REG_STK);
+ getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
+ (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
+ 0);
+
+ regTracker.rsTrackRegLclVar(regNum, varNum);
+
+ // Store the value (byte or short) into the stack
+
+ getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
+ (emitAttr)fieldVarDsc->lvExactSize, regNum,
+ REG_SPBASE, fieldVarDsc->lvFldOffset);
+ }
+ }
+ }
+ genUpdateLife(structLocalTree);
+
+ break;
+ }
+ }
+
+ genCodeForTree(arg->gtObj.gtOp1, 0);
+ noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+ regNumber reg = arg->gtObj.gtOp1->gtRegNum;
+ // Get the number of DWORDS to copy to the stack
+ opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
+ unsigned slots = (unsigned)(opsz / sizeof(void*));
+
+ BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+
+ compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+
+ BOOL bNoneGC = TRUE;
+ for (int i = slots - 1; i >= 0; --i)
+ {
+ if (gcLayout[i] != TYPE_GC_NONE)
+ {
+ bNoneGC = FALSE;
+ break;
+ }
+ }
+
+ /* passing large structures using movq instead of pushes does not increase codesize very much */
+ unsigned movqLenMin = 8;
+ unsigned movqLenMax = 64;
+ unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
+
+ if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
+ {
+ // Don't bother with this optimization in
+ // rarely run blocks or when optimizing for size
+ movqLenMax = movqLenMin = 0;
+ }
+ else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
+ {
+ // Be more aggressive when optimizing for speed
+ movqLenMax *= 2;
+ }
+
+ /* Adjust for BB weight */
+ if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+ {
+ // Be more aggressive when we are inside a loop
+ movqLenMax *= 2;
+ }
+
+ if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
+ {
+ JITLOG_THIS(compiler, (LL_INFO10000,
+ "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
+ opsz, compiler->info.compFullName));
+
+ int stkDisp = (int)(unsigned)opsz;
+ int curDisp = 0;
+ regNumber xmmReg = REG_XMM0;
+
+ if (opsz & 0x4)
+ {
+ stkDisp -= sizeof(void*);
+ getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
+ genSinglePush();
+ }
+
+ inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
+ genStackLevel += stkDisp;
+
+ while (curDisp < stkDisp)
+ {
+ getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
+ getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
+ curDisp += 2 * sizeof(void*);
+ }
+ noway_assert(curDisp == stkDisp);
+ }
+ else
+ {
+ for (int i = slots - 1; i >= 0; --i)
+ {
+ emitAttr fieldSize;
+ if (gcLayout[i] == TYPE_GC_NONE)
+ fieldSize = EA_4BYTE;
+ else if (gcLayout[i] == TYPE_GC_REF)
+ fieldSize = EA_GCREF;
+ else
+ {
+ noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+ fieldSize = EA_BYREF;
+ }
+ getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*));
+ genSinglePush();
+ }
+ }
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
+ }
+
+ addrReg = 0;
+ break;
+ }
+
+ default:
+ noway_assert(!"unhandled/unexpected arg type");
+ NO_WAY("unhandled/unexpected arg type");
+ }
+
+ /* Update the current set of live variables */
+
+ genUpdateLife(curr);
+
+ /* Update the current set of register pointers */
+
+ noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
+ genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
+
+ /* Remember how much stuff we've pushed on the stack */
+
+ size += opsz;
+
+ /* Update the current argument stack offset */
+
+ /* Continue with the next argument, if any more are present */
+
+ } // while args
+
+ /* Move the deferred arguments to registers */
+
+ for (args = regArgs; args; args = args->Rest())
+ {
+ curr = args->Current();
+
+ assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+ assert(curArgTabEntry);
+ regNumber regNum = curArgTabEntry->regNum;
+
+ noway_assert(isRegParamType(curr->TypeGet()));
+ noway_assert(curr->gtType != TYP_VOID);
+
+ /* Evaluate the argument to a register [pair] */
+
+ if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
+ {
+ /* Check if this is the guess area for the resolve interface call
+ * Pass a size of EA_OFFSET*/
+ if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
+ {
+ getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
+ regTracker.rsTrackRegTrash(regNum);
+
+ /* The value is now in the appropriate register */
+
+ genMarkTreeInReg(curr, regNum);
+ }
+ else
+ {
+ genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
+ }
+
+ noway_assert(curr->gtRegNum == regNum);
+
+ /* If the register is already marked as used, it will become
+ multi-used. However, since it is a callee-trashed register,
+ we will have to spill it before the call anyway. So do it now */
+
+ if (regSet.rsMaskUsed & genRegMask(regNum))
+ {
+ noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
+ regSet.rsSpillReg(regNum);
+ }
+
+ /* Mark the register as 'used' */
+
+ regSet.rsMarkRegUsed(curr);
+ }
+ else
+ {
+ noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
+ }
+ }
+
+ /* If any of the previously loaded arguments were spilled - reload them */
+
+ for (args = regArgs; args; args = args->Rest())
+ {
+ curr = args->Current();
+ assert(curr);
+
+ if (curr->gtFlags & GTF_SPILLED)
+ {
+ if (isRegPairType(curr->gtType))
+ {
+ regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
+ }
+ else
+ {
+ regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
+ }
+ }
+ }
+
+ /* Return the total size pushed */
+
+ return size;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+//
+// ARM and AMD64 uses this method to pass the stack based args
+//
+// returns size pushed (always zero)
+size_t CodeGen::genPushArgList(GenTreePtr call)
+{
+
+ GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs;
+ GenTreePtr curr;
+ var_types type;
+ int argSize;
+
+ GenTreeArgList* args;
+ // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
+ // so we can iterate over this argument list more uniformly.
+ // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
+ GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
+ if (call->gtCall.gtCallObjp == NULL)
+ {
+ args = call->gtCall.gtCallArgs;
+ }
+ else
+ {
+ objpArgList.Current() = call->gtCall.gtCallObjp;
+ args = &objpArgList;
+ }
+
+ for (; args; args = args->Rest())
+ {
+ /* Get hold of the next argument value */
+ curr = args->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+ assert(curArgTabEntry);
+ regNumber regNum = curArgTabEntry->regNum;
+ int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+ /* See what type of a value we're passing */
+ type = curr->TypeGet();
+
+ if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
+ {
+ type = TYP_VOID;
+ }
+
+ // This holds the set of registers corresponding to enregistered promoted struct field variables
+ // that go dead after this use of the variable in the argument list.
+ regMaskTP deadFieldVarRegs = RBM_NONE;
+
+ argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
+
+ if (curr->IsArgPlaceHolderNode())
+ {
+ assert(curr->gtFlags & GTF_LATE_ARG);
+ goto DEFERRED;
+ }
+
+ if (varTypeIsSmall(type))
+ {
+ // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
+ type = TYP_I_IMPL;
+ }
+
+ switch (type)
+ {
+
+ case TYP_DOUBLE:
+ case TYP_LONG:
+
+#if defined(_TARGET_ARM_)
+
+ argSize = (TARGET_POINTER_SIZE * 2);
+
+ /* Is the value a constant? */
+
+ if (curr->gtOper == GT_CNS_LNG)
+ {
+ assert((curr->gtFlags & GTF_LATE_ARG) == 0);
+
+ int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
+ int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
+
+ instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
+
+ instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
+ argOffset + 4);
+
+ break;
+ }
+ else
+ {
+ genCodeForTree(curr, 0);
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ // The arg was assigned into a temp and
+ // will be moved to the correct register or slot later
+
+ argSize = 0; // nothing is passed on the stack
+ }
+ else
+ {
+ // The arg is passed in the outgoing argument area of the stack frame
+ //
+ assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
+ assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+
+ if (type == TYP_LONG)
+ {
+ regNumber regLo = genRegPairLo(curr->gtRegPair);
+ regNumber regHi = genRegPairHi(curr->gtRegPair);
+
+ assert(regLo != REG_STK);
+ inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
+ if (regHi == REG_STK)
+ {
+ regHi = regSet.rsPickFreeReg();
+ inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
+ regTracker.rsTrackRegTrash(regHi);
+ }
+ inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
+ }
+ else // (type == TYP_DOUBLE)
+ {
+ inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
+ }
+ }
+ }
+ break;
+
+#elif defined(_TARGET_64BIT_)
+ __fallthrough;
+#else
+#error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
+#endif
+
+ case TYP_REF:
+ case TYP_BYREF:
+
+ case TYP_FLOAT:
+ case TYP_INT:
+ /* Is the value a constant? */
+
+ if (curr->gtOper == GT_CNS_INT)
+ {
+ assert(!(curr->gtFlags & GTF_LATE_ARG));
+
+#if REDUNDANT_LOAD
+ regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
+
+ if (reg != REG_NA)
+ {
+ inst_SA_RV(ins_Store(type), argOffset, reg, type);
+ }
+ else
+#endif
+ {
+ bool needReloc = compiler->opts.compReloc && curr->IsIconHandle();
+ emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
+ instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
+ compiler->lvaOutgoingArgSpaceVar, argOffset);
+ }
+ break;
+ }
+
+ /* This is passed as a pointer-sized integer argument */
+
+ genCodeForTree(curr, 0);
+
+ // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+#ifdef _TARGET_ARM_
+ argSize = 0; // nothing is passed on the stack
+#endif
+ }
+ else
+ {
+ // The arg is passed in the outgoing argument area of the stack frame
+
+ assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
+ assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+ inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
+
+ if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
+ }
+ break;
+
+ case TYP_VOID:
+ /* Is this a nothing node, deferred register argument? */
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ /* Handle side-effects */
+ DEFERRED:
+ if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
+ {
+#ifdef _TARGET_ARM_
+ {
+ GenTreePtr curArgNode = curArgTabEntry->node;
+ var_types curRegArgType = curArgNode->gtType;
+ assert(curRegArgType != TYP_UNDEF);
+
+ if (curRegArgType == TYP_STRUCT)
+ {
+ // If the RHS of the COPYBLK is a promoted struct local, then the use of that
+ // is an implicit use of all its field vars. If these are last uses, remember that,
+ // so we can later update the GC compiler->info.
+ if (curr->OperIsCopyBlkOp())
+ deadFieldVarRegs |= genFindDeadFieldRegs(curr);
+ }
+ }
+#endif // _TARGET_ARM_
+
+ genCodeForTree(curr, 0);
+ }
+ else
+ {
+ assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
+ }
+
+#if defined(_TARGET_ARM_)
+ argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
+#endif
+ }
+ else
+ {
+ for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ }
+ }
+ break;
+
+#ifdef _TARGET_ARM_
+
+ case TYP_STRUCT:
+ {
+ GenTree* arg = curr;
+ while (arg->gtOper == GT_COMMA)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ arg = arg->gtOp.gtOp2;
+ }
+ noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
+
+ CORINFO_CLASS_HANDLE clsHnd;
+ unsigned argAlign;
+ unsigned slots;
+ BYTE* gcLayout = NULL;
+
+ // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
+ // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
+ // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
+ // table entry for the promoted struct local. As we fill slots with the contents of a
+ // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
+ // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
+ // variable number of the next field variable to be copied.
+ LclVarDsc* promotedStructLocalVarDesc = NULL;
+ GenTreePtr structLocalTree = NULL;
+ unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot.
+ unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
+ unsigned promotedStructOffsetOfFirstStackSlot = 0;
+ unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized.
+
+ if (arg->OperGet() == GT_OBJ)
+ {
+ clsHnd = arg->gtObj.gtClass;
+ unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+ argAlign =
+ roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+ argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
+
+ slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
+
+ gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+
+ compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
+
+ // Are we loading a promoted struct local var?
+ if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
+ unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
+
+ // As much as we would like this to be a noway_assert, we can't because
+ // there are some weird casts out there, and backwards compatiblity
+ // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
+ // lvPromoted in general currently do not require the local to be
+ // TYP_STRUCT, so this assert is really more about how we wish the world
+ // was then some JIT invariant.
+ assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+ if (varDsc->lvPromoted &&
+ promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
+ // on stack.
+ {
+ assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+ promotedStructLocalVarDesc = varDsc;
+ nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+ }
+ }
+ }
+ else
+ {
+ noway_assert(arg->OperGet() == GT_MKREFANY);
+
+ clsHnd = NULL;
+ argAlign = TARGET_POINTER_SIZE;
+ argSize = 2 * TARGET_POINTER_SIZE;
+ slots = 2;
+ }
+
+ // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
+ noway_assert(regNum == REG_STK);
+
+ // This code passes a TYP_STRUCT by value using the outgoing arg space var
+ //
+ if (arg->OperGet() == GT_OBJ)
+ {
+ regNumber regSrc = REG_STK;
+ regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
+ int cStackSlots = 0;
+
+ if (promotedStructLocalVarDesc == NULL)
+ {
+ genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
+ noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+ regSrc = arg->gtObj.gtOp1->gtRegNum;
+ }
+
+ // The number of bytes to add "argOffset" to get the arg offset of the current slot.
+ int extraArgOffset = 0;
+
+ for (unsigned i = 0; i < slots; i++)
+ {
+ emitAttr fieldSize;
+ if (gcLayout[i] == TYPE_GC_NONE)
+ fieldSize = EA_PTRSIZE;
+ else if (gcLayout[i] == TYPE_GC_REF)
+ fieldSize = EA_GCREF;
+ else
+ {
+ noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+ fieldSize = EA_BYREF;
+ }
+
+ // Pass the argument using the lvaOutgoingArgSpaceVar
+
+ if (promotedStructLocalVarDesc != NULL)
+ {
+ if (argOffsetOfFirstStackSlot == UINT32_MAX)
+ argOffsetOfFirstStackSlot = argOffset;
+
+ regNumber maxRegArg = regNumber(MAX_REG_ARG);
+ bool filledExtraSlot = genFillSlotFromPromotedStruct(
+ arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
+ &bytesOfNextSlotOfCurPromotedStruct,
+ /*pCurRegNum*/ &maxRegArg,
+ /*argOffset*/ argOffset + extraArgOffset,
+ /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
+ argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
+ extraArgOffset += TARGET_POINTER_SIZE;
+ // If we filled an extra slot with an 8-byte value, skip a slot.
+ if (filledExtraSlot)
+ {
+ i++;
+ cStackSlots++;
+ extraArgOffset += TARGET_POINTER_SIZE;
+ }
+ }
+ else
+ {
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
+ i * TARGET_POINTER_SIZE);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar,
+ argOffset + cStackSlots * TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(regTmp);
+ }
+ cStackSlots++;
+ }
+
+ if (promotedStructLocalVarDesc == NULL)
+ {
+ regSet.rsMarkRegFree(genRegMask(regSrc));
+ }
+ if (structLocalTree != NULL)
+ genUpdateLife(structLocalTree);
+ }
+ else
+ {
+ assert(arg->OperGet() == GT_MKREFANY);
+ PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
+ argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
+ }
+ }
+ break;
+#endif // _TARGET_ARM_
+
+ default:
+ assert(!"unhandled/unexpected arg type");
+ NO_WAY("unhandled/unexpected arg type");
+ }
+
+ /* Update the current set of live variables */
+
+ genUpdateLife(curr);
+
+ // Now, if some copied field locals were enregistered, and they're now dead, update the set of
+ // register holding gc pointers.
+ if (deadFieldVarRegs != 0)
+ gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
+
+ /* Update the current argument stack offset */
+
+ argOffset += argSize;
+
+ /* Continue with the next argument, if any more are present */
+ } // while (args)
+
+ if (lateArgs)
+ {
+ SetupLateArgs(call);
+ }
+
+ /* Return the total size pushed */
+
+ return 0;
+}
+
+#ifdef _TARGET_ARM_
+bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr arg,
+ fgArgTabEntryPtr curArgTabEntry,
+ LclVarDsc* promotedStructLocalVarDesc,
+ emitAttr fieldSize,
+ unsigned* pNextPromotedStructFieldVar,
+ unsigned* pBytesOfNextSlotOfCurPromotedStruct,
+ regNumber* pCurRegNum,
+ int argOffset,
+ int fieldOffsetOfFirstStackSlot,
+ int argOffsetOfFirstStackSlot,
+ regMaskTP* deadFieldVarRegs,
+ regNumber* pRegTmp)
+{
+ unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
+ unsigned limitPromotedStructFieldVar =
+ promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
+ unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
+
+ regNumber curRegNum = *pCurRegNum;
+ regNumber regTmp = *pRegTmp;
+ bool filledExtraSlot = false;
+
+ if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+ {
+ // We've already finished; just return.
+ // We can reach this because the calling loop computes a # of slots based on the size of the struct.
+ // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
+ // the fourth slot, even though we've copied all the fields.
+ return false;
+ }
+
+ LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+
+ // Does this field fill an entire slot, and does it go at the start of the slot?
+ // If so, things are easier...
+
+ bool oneFieldFillsSlotFromStart =
+ (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
+ && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and...
+ && (nextPromotedStructFieldVar + 1 ==
+ limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
+ || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
+
+ // Compute the proper size.
+ if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
+ {
+ switch (fieldVarDsc->lvExactSize)
+ {
+ case 1:
+ fieldSize = EA_1BYTE;
+ break;
+ case 2:
+ fieldSize = EA_2BYTE;
+ break;
+ case 8:
+ // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
+ // in which case we should not have promoted the struct variable.
+ noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
+
+ // If the current reg number is not aligned, align it, and return to the calling loop, which will
+ // consider that a filled slot and move on to the next argument register.
+ if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
+ {
+ // We must update the slot target, however!
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
+ return false;
+ }
+ // Dest is an aligned pair of arg regs, if the struct type demands it.
+ noway_assert((curRegNum % 2) == 0);
+ // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
+ break;
+ default:
+ assert(fieldVarDsc->lvExactSize == 4);
+ break;
+ }
+ }
+ else
+ {
+ // If the gc layout said it's a GC ref or byref, then the field size must be 4.
+ noway_assert(fieldVarDsc->lvExactSize == 4);
+ }
+
+ // We may need the type of the field to influence instruction selection.
+ // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
+ // If the fieldVarDsc is enregistered float we must use the field's exact type
+ // however if it is in memory we can use an integer type TYP_I_IMPL
+ //
+ var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
+ if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
+ {
+ fieldTypeForInstr = TYP_I_IMPL;
+ }
+
+ // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
+ if (curArgTabEntry->isHfaRegArg)
+ {
+ assert(oneFieldFillsSlotFromStart);
+
+ // Is the field variable promoted?
+ if (fieldVarDsc->lvRegister)
+ {
+ // Move the field var living in register to dst, if they are different registers.
+ regNumber srcReg = fieldVarDsc->lvRegNum;
+ regNumber dstReg = curRegNum;
+ if (srcReg != dstReg)
+ {
+ inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
+ assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
+ }
+ }
+ else
+ {
+ // Move the field var living in stack to dst.
+ getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
+ fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
+ nextPromotedStructFieldVar, 0);
+ assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
+ }
+
+ // Mark the arg as used and using reg val.
+ genMarkTreeInReg(arg, curRegNum);
+ regSet.SetUsedRegFloat(arg, true);
+
+ // Advance for double.
+ if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
+ {
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ curRegNum = REG_NEXT(curRegNum);
+ arg->gtRegNum = curRegNum;
+ regSet.SetUsedRegFloat(arg, true);
+ filledExtraSlot = true;
+ }
+ arg->gtRegNum = curArgTabEntry->regNum;
+
+ // Advance.
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ nextPromotedStructFieldVar++;
+ }
+ else
+ {
+ if (oneFieldFillsSlotFromStart)
+ {
+ // If we write to the stack, offset in outgoing args at which we'll write.
+ int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
+ assert(fieldArgOffset >= 0);
+
+ // Is the source a register or memory?
+ if (fieldVarDsc->lvRegister)
+ {
+ if (fieldTypeForInstr == TYP_DOUBLE)
+ {
+ fieldSize = EA_8BYTE;
+ }
+
+ // Are we writing to a register or to the stack?
+ if (curRegNum != MAX_REG_ARG)
+ {
+ // Source is register and Dest is register.
+
+ instruction insCopy = INS_mov;
+
+ if (varTypeIsFloating(fieldTypeForInstr))
+ {
+ if (fieldTypeForInstr == TYP_FLOAT)
+ {
+ insCopy = INS_vmov_f2i;
+ }
+ else
+ {
+ assert(fieldTypeForInstr == TYP_DOUBLE);
+ insCopy = INS_vmov_d2i;
+ }
+ }
+
+ // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second
+ // register (which may become a tmp register, if its held in the argument register that the first
+ // register to be copied will overwrite).
+ regNumber otherRegNum = REG_STK;
+ if (fieldVarDsc->lvType == TYP_LONG)
+ {
+ otherRegNum = fieldVarDsc->lvOtherReg;
+ // Are we about to overwrite?
+ if (otherRegNum == curRegNum)
+ {
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+ // Copy the second register to the temp reg.
+ getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
+ regTracker.rsTrackRegCopy(regTmp, otherRegNum);
+ otherRegNum = regTmp;
+ }
+ }
+
+ if (fieldVarDsc->lvType == TYP_DOUBLE)
+ {
+ assert(curRegNum <= REG_R2);
+ getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
+ fieldVarDsc->lvRegNum);
+ regTracker.rsTrackRegTrash(curRegNum);
+ regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
+ }
+ else
+ {
+ // Now do the first register.
+ // It might be the case that it's already in the desired register; if so do nothing.
+ if (curRegNum != fieldVarDsc->lvRegNum)
+ {
+ getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
+ regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
+ }
+ }
+
+ // In either case, mark the arg register as used.
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+
+ // Is there a second half of the value?
+ if (fieldVarDsc->lvExactSize == 8)
+ {
+ curRegNum = genRegArgNext(curRegNum);
+ // The second dest reg must also be an argument register.
+ noway_assert(curRegNum < MAX_REG_ARG);
+
+ // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
+ if (fieldVarDsc->lvType == TYP_LONG)
+ {
+ // Copy the second register into the next argument register
+
+ // If it's a register variable for a TYP_LONG value, then otherReg now should
+ // hold the second register or it might say that it's in the stack.
+ if (otherRegNum == REG_STK)
+ {
+ // Apparently when we partially enregister, we allocate stack space for the full
+ // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
+ // parameter, to get the high half.
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
+ nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(curRegNum);
+ }
+ else
+ {
+ // The other half is in a register.
+ // Again, it might be the case that it's already in the desired register; if so do
+ // nothing.
+ if (curRegNum != otherRegNum)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
+ regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
+ }
+ }
+ }
+
+ // Also mark the 2nd arg register as used.
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
+ // Record the fact that we filled in an extra register slot
+ filledExtraSlot = true;
+ }
+ }
+ else
+ {
+ // Source is register and Dest is memory (OutgoingArgSpace).
+
+ // Now write the srcReg into the right location in the outgoing argument list.
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+ compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+
+ if (fieldVarDsc->lvExactSize == 8)
+ {
+ // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
+ if (fieldVarDsc->lvType == TYP_LONG)
+ {
+ if (fieldVarDsc->lvOtherReg == REG_STK)
+ {
+ // Source is stack.
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+ // Apparently if we partially enregister, we allocate stack space for the full
+ // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
+ // parameter, to get the high half.
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+ nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(regTmp);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar,
+ fieldArgOffset + TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
+ compiler->lvaOutgoingArgSpaceVar,
+ fieldArgOffset + TARGET_POINTER_SIZE);
+ }
+ }
+ // Record the fact that we filled in an extra register slot
+ filledExtraSlot = true;
+ }
+ }
+ assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
+ // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
+ if (arg->gtFlags & GTF_VAR_DEATH)
+ {
+ *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
+ // We don't bother with the second reg of a register pair, since if it has one,
+ // it obviously doesn't hold a pointer.
+ }
+ }
+ else
+ {
+ // Source is in memory.
+
+ if (curRegNum != MAX_REG_ARG)
+ {
+ // Dest is reg.
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
+ nextPromotedStructFieldVar, 0);
+ regTracker.rsTrackRegTrash(curRegNum);
+
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+
+ if (fieldVarDsc->lvExactSize == 8)
+ {
+ noway_assert(fieldSize == EA_4BYTE);
+ curRegNum = genRegArgNext(curRegNum);
+ noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
+ nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(curRegNum);
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+ // Record the fact that we filled in an extra stack slot
+ filledExtraSlot = true;
+ }
+ }
+ else
+ {
+ // Dest is stack.
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+ nextPromotedStructFieldVar, 0);
+
+ // Now write regTmp into the right location in the outgoing argument list.
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+ // We overwrote "regTmp", so erase any previous value we recorded that it contained.
+ regTracker.rsTrackRegTrash(regTmp);
+
+ if (fieldVarDsc->lvExactSize == 8)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+ nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar,
+ fieldArgOffset + TARGET_POINTER_SIZE);
+ // Record the fact that we filled in an extra stack slot
+ filledExtraSlot = true;
+ }
+ }
+ }
+
+ // Bump up the following if we filled in an extra slot
+ if (filledExtraSlot)
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+
+ // Go to the next field.
+ nextPromotedStructFieldVar++;
+ if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+ {
+ fieldVarDsc = NULL;
+ }
+ else
+ {
+ // The next field should have the same parent variable, and we should have put the field vars in order
+ // sorted by offset.
+ assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
+ fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
+ fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
+ fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+ }
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ }
+ else // oneFieldFillsSlotFromStart == false
+ {
+ // The current slot should contain more than one field.
+ // We'll construct a word in memory for the slot, then load it into a register.
+ // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
+ // slot, in which case we'll just skip this loop altogether.)
+ while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
+ {
+ // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
+ // whose fields have their natural alignment, and alignment == size on ARM).
+ noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
+
+ // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
+ int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
+ noway_assert(argOffset == INT32_MAX ||
+ (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
+
+ if (fieldVarDsc->lvRegister)
+ {
+ if (curRegNum != MAX_REG_ARG)
+ {
+ noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+ compiler->lvaPromotedStructAssemblyScratchVar,
+ fieldVarDsc->lvFldOffset % 4);
+ }
+ else
+ {
+ // Dest is stack; write directly.
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+ compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+ }
+ }
+ else
+ {
+ // Source is in memory.
+
+ // Make sure we have a temporary register to use...
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+ nextPromotedStructFieldVar, 0);
+ regTracker.rsTrackRegTrash(regTmp);
+
+ if (curRegNum != MAX_REG_ARG)
+ {
+ noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+ compiler->lvaPromotedStructAssemblyScratchVar,
+ fieldVarDsc->lvFldOffset % 4);
+ }
+ else
+ {
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+ }
+ }
+ // Go to the next field.
+ nextPromotedStructFieldVar++;
+ if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+ {
+ fieldVarDsc = NULL;
+ }
+ else
+ {
+ // The next field should have the same parent variable, and we should have put the field vars in
+ // order sorted by offset.
+ noway_assert(fieldVarDsc->lvIsStructField &&
+ compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
+ fieldVarDsc->lvParentLcl ==
+ compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
+ fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
+ fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+ }
+ }
+ // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
+ // write to an argument register, do so.
+ if (curRegNum != MAX_REG_ARG)
+ {
+ noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
+ compiler->lvaPromotedStructAssemblyScratchVar, 0);
+ regTracker.rsTrackRegTrash(curRegNum);
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+ }
+ // We've finished a slot; set the goal of the next slot.
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ }
+ }
+
+ // Write back the updates.
+ *pNextPromotedStructFieldVar = nextPromotedStructFieldVar;
+ *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
+ *pCurRegNum = curRegNum;
+ *pRegTmp = regTmp;
+
+ return filledExtraSlot;
+}
+#endif // _TARGET_ARM_
+
+regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
+{
+ noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
+ GenTreePtr rhs = cpBlk->gtOp.gtOp1;
+ regMaskTP res = 0;
+ if (rhs->OperIsIndir())
+ {
+ GenTree* addr = rhs->AsIndir()->Addr();
+ if (addr->gtOper == GT_ADDR)
+ {
+ rhs = addr->gtOp.gtOp1;
+ }
+ }
+ if (rhs->OperGet() == GT_LCL_VAR)
+ {
+ LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
+ if (rhsDsc->lvPromoted)
+ {
+ // It is promoted; iterate over its field vars.
+ unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
+ for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
+ {
+ LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
+ // Did the variable go dead, and is it enregistered?
+ if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
+ {
+ // Add the register number to the set of registers holding field vars that are going dead.
+ res |= genRegMask(fieldVarDsc->lvRegNum);
+ }
+ }
+ }
+ }
+ return res;
+}
+
+void CodeGen::SetupLateArgs(GenTreePtr call)
+{
+ GenTreeArgList* lateArgs;
+ GenTreePtr curr;
+
+ /* Generate the code to move the late arguments into registers */
+
+ for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
+ {
+ curr = lateArgs->Current();
+ assert(curr);
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+ assert(curArgTabEntry);
+ regNumber regNum = curArgTabEntry->regNum;
+ unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+ assert(isRegParamType(curr->TypeGet()));
+ assert(curr->gtType != TYP_VOID);
+
+ /* If the register is already marked as used, it will become
+ multi-used. However, since it is a callee-trashed register,
+ we will have to spill it before the call anyway. So do it now */
+
+ {
+ // Remember which registers hold pointers. We will spill
+ // them, but the code that follows will fetch reg vars from
+ // the registers, so we need that gc compiler->info.
+ // Also regSet.rsSpillReg doesn't like to spill enregistered
+ // variables, but if this is their last use that is *exactly*
+ // what we need to do, so we have to temporarily pretend
+ // they are no longer live.
+ // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
+ // when their last use is about to occur?
+ // It is because this is the second operand to be evaluated
+ // of some parent binary op, and the first operand is
+ // live across this tree, and thought it could re-use the
+ // variables register (like a GT_REG_VAR). This probably
+ // is caused by RegAlloc assuming the first operand would
+ // evaluate into another register.
+ regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
+ regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
+ regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
+ regSet.RemoveMaskVars(rsTemp);
+
+ regNumber regNum2 = regNum;
+ for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
+ {
+ if (regSet.rsMaskUsed & genRegMask(regNum2))
+ {
+ assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
+ regSet.rsSpillReg(regNum2);
+ }
+ regNum2 = genRegArgNext(regNum2);
+ assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
+ }
+
+ // Restore gc tracking masks.
+ gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
+ gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
+
+ // Set maskvars back to normal
+ regSet.AddMaskVars(rsTemp);
+ }
+
+ /* Evaluate the argument to a register */
+
+ /* Check if this is the guess area for the resolve interface call
+ * Pass a size of EA_OFFSET*/
+ if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
+ {
+ getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
+ regTracker.rsTrackRegTrash(regNum);
+
+ /* The value is now in the appropriate register */
+
+ genMarkTreeInReg(curr, regNum);
+
+ regSet.rsMarkRegUsed(curr);
+ }
+#ifdef _TARGET_ARM_
+ else if (curr->gtType == TYP_STRUCT)
+ {
+ GenTree* arg = curr;
+ while (arg->gtOper == GT_COMMA)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ arg = arg->gtOp.gtOp2;
+ }
+ noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
+ (arg->OperGet() == GT_MKREFANY));
+
+ // This code passes a TYP_STRUCT by value using
+ // the argument registers first and
+ // then the lvaOutgoingArgSpaceVar area.
+ //
+
+ // We prefer to choose low registers here to reduce code bloat
+ regMaskTP regNeedMask = RBM_LOW_REGS;
+ unsigned firstStackSlot = 0;
+ unsigned argAlign = TARGET_POINTER_SIZE;
+ size_t originalSize = InferStructOpSizeAlign(arg, &argAlign);
+
+ unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
+ assert(slots > 0);
+
+ if (regNum == REG_STK)
+ {
+ firstStackSlot = 0;
+ }
+ else
+ {
+ if (argAlign == (TARGET_POINTER_SIZE * 2))
+ {
+ assert((regNum & 1) == 0);
+ }
+
+ // firstStackSlot is an index of the first slot of the struct
+ // that is on the stack, in the range [0,slots]. If it is 'slots',
+ // then the entire struct is in registers. It is also equal to
+ // the number of slots of the struct that are passed in registers.
+
+ if (curArgTabEntry->isHfaRegArg)
+ {
+ // HFA arguments that have been decided to go into registers fit the reg space.
+ assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
+ assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
+ "HFA argument doesn't fit entirely in FP argument registers");
+ firstStackSlot = slots;
+ }
+ else if (regNum + slots > MAX_REG_ARG)
+ {
+ firstStackSlot = MAX_REG_ARG - regNum;
+ assert(firstStackSlot > 0);
+ }
+ else
+ {
+ firstStackSlot = slots;
+ }
+
+ if (curArgTabEntry->isHfaRegArg)
+ {
+ // Mask out the registers used by an HFA arg from the ones used to compute tree into.
+ for (unsigned i = regNum; i < regNum + slots; i++)
+ {
+ regNeedMask &= ~genRegMask(regNumber(i));
+ }
+ }
+ }
+
+ // This holds the set of registers corresponding to enregistered promoted struct field variables
+ // that go dead after this use of the variable in the argument list.
+ regMaskTP deadFieldVarRegs = RBM_NONE;
+
+ // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
+ // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
+ // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
+ // table entry for the promoted struct local. As we fill slots with the contents of a
+ // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
+ // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
+ // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
+ // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
+ // to be copied.
+ LclVarDsc* promotedStructLocalVarDesc = NULL;
+ unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
+ unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
+ GenTreePtr structLocalTree = NULL;
+
+ BYTE* gcLayout = NULL;
+ regNumber regSrc = REG_NA;
+ if (arg->gtOper == GT_OBJ)
+ {
+ // Are we loading a promoted struct local var?
+ if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
+ unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+ if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
+ // guaranteed to
+ // live on stack.
+ {
+ // Fix 388395 ARM JitStress WP7
+ noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
+
+ assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+ promotedStructLocalVarDesc = varDsc;
+ nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+ }
+ }
+
+ if (promotedStructLocalVarDesc == NULL)
+ {
+ // If it's not a promoted struct variable, set "regSrc" to the address
+ // of the struct local.
+ genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+ regSrc = arg->gtObj.gtOp1->gtRegNum;
+ // Remove this register from the set of registers that we pick from, unless slots equals 1
+ if (slots > 1)
+ regNeedMask &= ~genRegMask(regSrc);
+ }
+
+ gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+ compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+ }
+ else if (arg->gtOper == GT_LCL_VAR)
+ {
+ // Move the address of the LCL_VAR in arg into reg
+
+ unsigned varNum = arg->gtLclVarCommon.gtLclNum;
+
+ // Are we loading a promoted struct local var?
+ structLocalTree = arg;
+ unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
+
+ noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+ if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
+ // guaranteed to live
+ // on stack.
+ {
+ assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+ promotedStructLocalVarDesc = varDsc;
+ nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+ }
+
+ if (promotedStructLocalVarDesc == NULL)
+ {
+ regSrc = regSet.rsPickFreeReg(regNeedMask);
+ // Remove this register from the set of registers that we pick from, unless slots equals 1
+ if (slots > 1)
+ regNeedMask &= ~genRegMask(regSrc);
+
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
+ regTracker.rsTrackRegTrash(regSrc);
+ gcLayout = compiler->lvaGetGcLayout(varNum);
+ }
+ }
+ else if (arg->gtOper == GT_MKREFANY)
+ {
+ assert(slots == 2);
+ assert((firstStackSlot == 1) || (firstStackSlot == 2));
+ assert(argOffset == 0); // ???
+ PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
+
+ // Adjust argOffset if part of this guy was pushed onto the stack
+ if (firstStackSlot < slots)
+ {
+ argOffset += TARGET_POINTER_SIZE;
+ }
+
+ // Skip the copy loop below because we have already placed the argument in the right place
+ slots = 0;
+ gcLayout = NULL;
+ }
+ else
+ {
+ assert(!"Unsupported TYP_STRUCT arg kind");
+ gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+ }
+
+ if (promotedStructLocalVarDesc != NULL)
+ {
+ // We must do do the stack parts first, since those might need values
+ // from argument registers that will be overwritten in the portion of the
+ // loop that writes into the argument registers.
+ bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
+ // Now find the var number of the first that starts in the first stack slot.
+ unsigned fieldVarLim =
+ promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
+ while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
+ (firstStackSlot * TARGET_POINTER_SIZE) &&
+ nextPromotedStructFieldVar < fieldVarLim)
+ {
+ nextPromotedStructFieldVar++;
+ }
+ // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
+ // first stack slot is after the last slot.
+ assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
+ }
+
+ if (slots > 0) // the mkref case may have set "slots" to zero.
+ {
+ // First pass the stack portion of the struct (if any)
+ //
+ int argOffsetOfFirstStackSlot = argOffset;
+ for (unsigned i = firstStackSlot; i < slots; i++)
+ {
+ emitAttr fieldSize;
+ if (gcLayout[i] == TYPE_GC_NONE)
+ fieldSize = EA_PTRSIZE;
+ else if (gcLayout[i] == TYPE_GC_REF)
+ fieldSize = EA_GCREF;
+ else
+ {
+ noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+ fieldSize = EA_BYREF;
+ }
+
+ regNumber maxRegArg = regNumber(MAX_REG_ARG);
+ if (promotedStructLocalVarDesc != NULL)
+ {
+ regNumber regTmp = REG_STK;
+
+ bool filledExtraSlot =
+ genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
+ &nextPromotedStructFieldVar,
+ &bytesOfNextSlotOfCurPromotedStruct,
+ /*pCurRegNum*/ &maxRegArg, argOffset,
+ /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
+ TARGET_POINTER_SIZE,
+ argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
+ if (filledExtraSlot)
+ {
+ i++;
+ argOffset += TARGET_POINTER_SIZE;
+ }
+ }
+ else // (promotedStructLocalVarDesc == NULL)
+ {
+ // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
+ // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
+ // to regSet.rsPickFreeReg, so we need to be a little more forceful.
+ // Otherwise, just re-use the same register.
+ //
+ regNumber regTmp = regSrc;
+ if (slots != 1)
+ {
+ regMaskTP regSrcUsed;
+ regSet.rsLockReg(genRegMask(regSrc), &regSrcUsed);
+
+ regTmp = regSet.rsPickFreeReg(regNeedMask);
+
+ noway_assert(regTmp != regSrc);
+
+ regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
+ }
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
+ i * TARGET_POINTER_SIZE);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar, argOffset);
+ regTracker.rsTrackRegTrash(regTmp);
+ }
+ argOffset += TARGET_POINTER_SIZE;
+ }
+
+ // Now pass the register portion of the struct
+ //
+
+ bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
+ if (promotedStructLocalVarDesc != NULL)
+ nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+
+ // Create a nested loop here so that the first time thru the loop
+ // we setup all of the regArg registers except for possibly
+ // the one that would overwrite regSrc. Then in the final loop
+ // (if necessary) we just setup regArg/regSrc with the overwrite
+ //
+ bool overwriteRegSrc = false;
+ bool needOverwriteRegSrc = false;
+ do
+ {
+ if (needOverwriteRegSrc)
+ overwriteRegSrc = true;
+
+ for (unsigned i = 0; i < firstStackSlot; i++)
+ {
+ regNumber regArg = (regNumber)(regNum + i);
+
+ if (overwriteRegSrc == false)
+ {
+ if (regArg == regSrc)
+ {
+ needOverwriteRegSrc = true;
+ continue;
+ }
+ }
+ else
+ {
+ if (regArg != regSrc)
+ continue;
+ }
+
+ emitAttr fieldSize;
+ if (gcLayout[i] == TYPE_GC_NONE)
+ fieldSize = EA_PTRSIZE;
+ else if (gcLayout[i] == TYPE_GC_REF)
+ fieldSize = EA_GCREF;
+ else
+ {
+ noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+ fieldSize = EA_BYREF;
+ }
+
+ regNumber regTmp = REG_STK;
+ if (promotedStructLocalVarDesc != NULL)
+ {
+ bool filledExtraSlot =
+ genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
+ fieldSize, &nextPromotedStructFieldVar,
+ &bytesOfNextSlotOfCurPromotedStruct,
+ /*pCurRegNum*/ &regArg,
+ /*argOffset*/ INT32_MAX,
+ /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
+ /*argOffsetOfFirstStackSlot*/ INT32_MAX,
+ &deadFieldVarRegs, &regTmp);
+ if (filledExtraSlot)
+ i++;
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
+ fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
+ }
+ regTracker.rsTrackRegTrash(regArg);
+ }
+ } while (needOverwriteRegSrc != overwriteRegSrc);
+ }
+
+ if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
+ {
+ regSet.rsMarkRegFree(genRegMask(regSrc));
+ }
+
+ if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
+ // used.
+ {
+ arg->gtFlags |= GTF_REG_VAL;
+ for (unsigned i = 1; i < firstStackSlot; i++)
+ {
+ arg->gtRegNum = (regNumber)(regNum + i);
+ curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
+ }
+ arg->gtRegNum = regNum;
+ curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
+ }
+
+ // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
+ // so update liveness.
+ genUpdateLife(arg);
+
+ // Now, if some copied field locals were enregistered, and they're now dead, update the set of
+ // register holding gc pointers.
+ if (deadFieldVarRegs != RBM_NONE)
+ gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
+ }
+ else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
+ {
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // The arg is passed in the outgoing argument area of the stack frame
+ genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
+ assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
+
+ inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
+ inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
+ }
+ else
+ {
+ assert(regNum < REG_ARG_LAST);
+ regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
+ genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
+ assert(curr->gtRegPair == regPair);
+ regSet.rsMarkRegPairUsed(curr);
+ }
+ }
+#endif // _TARGET_ARM_
+ else if (curArgTabEntry->regNum == REG_STK)
+ {
+ // The arg is passed in the outgoing argument area of the stack frame
+ //
+ genCodeForTree(curr, 0);
+ assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+
+ inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
+
+ if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
+ }
+ else
+ {
+ if (!varTypeIsFloating(curr->gtType))
+ {
+ genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
+ assert(curr->gtRegNum == regNum);
+ regSet.rsMarkRegUsed(curr);
+ }
+ else // varTypeIsFloating(curr->gtType)
+ {
+ if (genIsValidFloatReg(regNum))
+ {
+ genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
+ false);
+ assert(curr->gtRegNum == regNum);
+ regSet.rsMarkRegUsed(curr);
+ }
+ else
+ {
+ genCodeForTree(curr, 0);
+ // If we are loading a floating point type into integer registers
+ // then it must be for varargs.
+ // genCodeForTree will load it into a floating point register,
+ // now copy it into the correct integer register(s)
+ if (curr->TypeGet() == TYP_FLOAT)
+ {
+ assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
+ regSet.rsSpillRegIfUsed(regNum);
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
+#else
+#error "Unsupported target"
+#endif
+ regTracker.rsTrackRegTrash(regNum);
+
+ curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register
+ curr->gtRegNum = regNum;
+ regSet.rsMarkRegUsed(curr);
+ }
+ else
+ {
+ assert(curr->TypeGet() == TYP_DOUBLE);
+ regNumber intRegNumLo = regNum;
+ curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
+#ifdef _TARGET_ARM_
+ regNumber intRegNumHi = regNumber(intRegNumLo + 1);
+ assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
+ assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
+ regSet.rsSpillRegIfUsed(intRegNumHi);
+ regSet.rsSpillRegIfUsed(intRegNumLo);
+
+ getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
+ regTracker.rsTrackRegTrash(intRegNumLo);
+ regTracker.rsTrackRegTrash(intRegNumHi);
+ curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
+ regSet.rsMarkRegPairUsed(curr);
+#else
+#error "Unsupported target"
+#endif
+ }
+ }
+ }
+ }
+ }
+
+ /* If any of the previously loaded arguments were spilled - reload them */
+
+ for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
+ {
+ curr = lateArgs->Current();
+ assert(curr);
+
+ if (curr->gtFlags & GTF_SPILLED)
+ {
+ if (isRegPairType(curr->gtType))
+ {
+ regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
+ }
+ else
+ {
+ regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
+ }
+ }
+ }
+}
+
+#ifdef _TARGET_ARM_
+
+// 'Push' a single GT_MKREFANY argument onto a call's argument list
+// The argument is passed as described by the fgArgTabEntry
+// If any part of the struct is to be passed in a register the
+// regNum value will be equal to the the registers used to pass the
+// the first part of the struct.
+// If any part is to go onto the stack, we first generate the
+// value into a register specified by 'regNeedMask' and
+// then store it to the out going argument area.
+// When this method returns, both parts of the TypeReference have
+// been pushed onto the stack, but *no* registers have been marked
+// as 'in-use', that is the responsibility of the caller.
+//
+void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask)
+{
+ regNumber regNum = curArgTabEntry->regNum;
+ regNumber regNum2;
+ assert(mkRefAnyTree->gtOper == GT_MKREFANY);
+ regMaskTP arg1RegMask = 0;
+ int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+ // Construct the TypedReference directly into the argument list of the call by
+ // 'pushing' the first field of the typed reference: the pointer.
+ // Do this by directly generating it into the argument register or outgoing arg area of the stack.
+ // Mark it as used so we don't trash it while generating the second field.
+ //
+ if (regNum == REG_STK)
+ {
+ genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL);
+ regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
+ inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(tmpReg1);
+ argOffset += TARGET_POINTER_SIZE;
+ regNum2 = REG_STK;
+ }
+ else
+ {
+ assert(regNum <= REG_ARG_LAST);
+ arg1RegMask = genRegMask(regNum);
+ genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
+ }
+
+ // Now 'push' the second field of the typed reference: the method table.
+ if (regNum2 == REG_STK)
+ {
+ genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL);
+ regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
+ inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(tmpReg2);
+ }
+ else
+ {
+ assert(regNum2 <= REG_ARG_LAST);
+ // We don't have to mark this register as being in use here because it will
+ // be done by the caller, and we don't want to double-count it.
+ genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
+ }
+
+ // Now that we are done generating the second part of the TypeReference, we can mark
+ // the first register as free.
+ // The caller in the shared path we will re-mark all registers used by this argument
+ // as being used, so we don't want to double-count this one.
+ if (arg1RegMask != 0)
+ {
+ GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
+ if (op1->gtFlags & GTF_SPILLED)
+ {
+ /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
+
+ regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
+ }
+ else
+ {
+ regSet.rsMarkRegFree(arg1RegMask);
+ }
+ }
+}
+#endif // _TARGET_ARM_
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
+{
+ assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT);
+
+ regMaskTP fptrRegs;
+
+ /* Loading the indirect call target might cause one or more of the previously
+ loaded argument registers to be spilled. So, we save information about all
+ the argument registers, and unspill any of them that get spilled, after
+ the call target is loaded.
+ */
+ struct
+ {
+ GenTreePtr node;
+ union {
+ regNumber regNum;
+ regPairNo regPair;
+ };
+ } regArgTab[MAX_REG_ARG];
+
+ /* Record the previously loaded arguments, if any */
+
+ unsigned regIndex;
+ regMaskTP prefRegs = regSet.rsRegMaskFree();
+ regMaskTP argRegs = RBM_NONE;
+ for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
+ {
+ regMaskTP mask;
+ regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT);
+ GenTreePtr argTree = regSet.rsUsedTree[regNum];
+ regArgTab[regIndex].node = argTree;
+ if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
+ {
+ assert(argTree->gtFlags & GTF_REG_VAL);
+ if (isRegPairType(argTree->gtType))
+ {
+ regPairNo regPair = argTree->gtRegPair;
+ assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
+ regArgTab[regIndex].regPair = regPair;
+ mask = genRegPairMask(regPair);
+ }
+ else
+ {
+ assert(regNum == argTree->gtRegNum);
+ regArgTab[regIndex].regNum = regNum;
+ mask = genRegMask(regNum);
+ }
+ assert(!(prefRegs & mask));
+ argRegs |= mask;
+ }
+ }
+
+ /* Record the register(s) used for the indirect call func ptr */
+ fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
+
+ /* If any of the previously loaded arguments were spilled, reload them */
+
+ for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
+ {
+ GenTreePtr argTree = regArgTab[regIndex].node;
+ if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
+ {
+ assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
+ if (isRegPairType(argTree->gtType))
+ {
+ regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
+ }
+ else
+ {
+ regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
+ }
+ }
+ }
+
+ /* Make sure the target is still addressable while avoiding the argument registers */
+
+ fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs);
+
+ return fptrRegs;
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a call. If the call returns a value in register(s), the
+ * register mask that describes where the result will be found is returned;
+ * otherwise, RBM_NONE is returned.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
+{
+ emitAttr retSize;
+ size_t argSize;
+ size_t args;
+ regMaskTP retVal;
+ emitter::EmitCallType emitCallType;
+
+ unsigned saveStackLvl;
+
+ BasicBlock* returnLabel = DUMMY_INIT(NULL);
+ LclVarDsc* frameListRoot = NULL;
+
+ unsigned savCurIntArgReg;
+ unsigned savCurFloatArgReg;
+
+ unsigned areg;
+
+ regMaskTP fptrRegs = RBM_NONE;
+ regMaskTP vptrMask = RBM_NONE;
+
+#ifdef DEBUG
+ unsigned stackLvl = getEmitter()->emitCurStackLvl;
+
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tBeg call ");
+ Compiler::printTreeID(call);
+ printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
+ }
+#endif
+
+ gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType;
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+
+#ifdef DEBUGGING_SUPPORT
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+#endif
+
+ /* Make some sanity checks on the call node */
+
+ // This is a call
+ noway_assert(call->IsCall());
+ // "this" only makes sense for user functions
+ noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
+ // tailcalls won't be done for helpers, caller-pop args, and check that
+ // the global flag is set
+ noway_assert(!call->gtCall.IsTailCall() ||
+ (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
+
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->gtCall.callSig;
+ }
+#endif // DEBUG
+
+ unsigned pseudoStackLvl = 0;
+
+ if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
+ {
+ noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
+
+ pseudoStackLvl = genStackLevel;
+
+ noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
+ "so fgAddCodeRef() should have set isFramePointerRequired()");
+ }
+
+ /* Mark the current stack level and list of pointer arguments */
+
+ saveStackLvl = genStackLevel;
+
+ /*-------------------------------------------------------------------------
+ * Set up the registers and arguments
+ */
+
+ /* We'll keep track of how much we've pushed on the stack */
+
+ argSize = 0;
+
+ /* We need to get a label for the return address with the proper stack depth. */
+ /* For the callee pops case (the default) that is before the args are pushed. */
+
+ if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
+ {
+ returnLabel = genCreateTempLabel();
+ }
+
+ /*
+ Make sure to save the current argument register status
+ in case we have nested calls.
+ */
+
+ noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+ savCurIntArgReg = intRegState.rsCurRegArgNum;
+ savCurFloatArgReg = floatRegState.rsCurRegArgNum;
+ intRegState.rsCurRegArgNum = 0;
+ floatRegState.rsCurRegArgNum = 0;
+
+ /* Pass the arguments */
+
+ if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL))
+ {
+ argSize += genPushArgList(call);
+ }
+
+ /* We need to get a label for the return address with the proper stack depth. */
+ /* For the caller pops case (cdecl) that is after the args are pushed. */
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ if (call->gtFlags & GTF_CALL_POP_ARGS)
+ returnLabel = genCreateTempLabel();
+
+ /* Make sure that we now have a label */
+ noway_assert(returnLabel != DUMMY_INIT(NULL));
+ }
+
+ if (callType == CT_INDIRECT)
+ {
+ fptrRegs = genLoadIndirectCallTarget(call);
+ }
+
+ /* Make sure any callee-trashed registers are saved */
+
+ regMaskTP calleeTrashedRegs = RBM_NONE;
+
+#if GTF_CALL_REG_SAVE
+ if (call->gtFlags & GTF_CALL_REG_SAVE)
+ {
+ /* The return value reg(s) will definitely be trashed */
+
+ switch (call->gtType)
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+#endif
+ calleeTrashedRegs = RBM_INTRET;
+ break;
+
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+ calleeTrashedRegs = RBM_LNGRET;
+ break;
+
+ case TYP_VOID:
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+#endif
+ calleeTrashedRegs = 0;
+ break;
+
+ default:
+ noway_assert(!"unhandled/unexpected type");
+ }
+ }
+ else
+#endif
+ {
+ calleeTrashedRegs = RBM_CALLEE_TRASH;
+ }
+
+ /* Spill any callee-saved registers which are being used */
+
+ regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
+
+ /* We need to save all GC registers to the InlinedCallFrame.
+ Instead, just spill them to temps. */
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
+
+ // Ignore fptrRegs as it is needed only to perform the indirect call
+
+ spillRegs &= ~fptrRegs;
+
+ /* Do not spill the argument registers.
+ Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
+
+ noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0);
+ spillRegs &= ~call->gtCall.gtCallRegUsedMask;
+
+ if (spillRegs)
+ {
+ regSet.rsSpillRegs(spillRegs);
+ }
+
+#if FEATURE_STACK_FP_X87
+ // Spill fp stack
+ SpillForCallStackFP();
+
+ if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+ {
+ // Pick up a reg
+ regNumber regReturn = regSet.PickRegFloat();
+
+ // Assign reg to tree
+ genMarkTreeInReg(call, regReturn);
+
+ // Mark as used
+ regSet.SetUsedRegFloat(call, true);
+
+ // Update fp state
+ compCurFPState.Push(regReturn);
+ }
+#else
+ SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask);
+#endif
+
+ /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
+
+ retSize = EA_PTRSIZE;
+
+ if (valUsed)
+ {
+ if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * For caller-pop calls, the GC info will report the arguments as pending
+ arguments as the caller explicitly pops them. Also should be
+ reported as non-GC arguments as they effectively go dead at the
+ call site (callee owns them)
+ */
+
+ args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
+
+#ifdef PROFILING_SUPPORTED
+
+ /*-------------------------------------------------------------------------
+ * Generate the profiling hooks for the call
+ */
+
+ /* Treat special cases first */
+
+ /* fire the event at the call site */
+ /* alas, right now I can only handle calls via a method handle */
+ if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->gtCall.IsTailCall())
+ {
+ unsigned saveStackLvl2 = genStackLevel;
+
+ //
+ // Push the profilerHandle
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ regMaskTP byrefPushedRegs;
+ regMaskTP norefPushedRegs;
+ regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
+ (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+ genSinglePush();
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+ sizeof(int) * 1, // argSize
+ EA_UNKNOWN); // retSize
+
+ //
+ // Adjust the number of stack slots used by this managed method if necessary.
+ //
+ if (compiler->fgPtrArgCntMax < 1)
+ {
+ compiler->fgPtrArgCntMax = 1;
+ }
+
+ genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
+#elif _TARGET_ARM_
+ // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
+ // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
+ // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
+ //
+ // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
+ // to disturb them and hence argument registers are locked here.
+ regMaskTP usedMask = RBM_NONE;
+ regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
+
+ regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
+ regSet.rsLockReg(genRegMask(scratchReg));
+
+ emitAttr attr = EA_UNKNOWN;
+ if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
+ {
+ attr = EA_GCREF;
+ gcInfo.gcMarkRegSetGCref(scratchReg);
+ }
+ else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
+ {
+ attr = EA_BYREF;
+ gcInfo.gcMarkRegSetByref(scratchReg);
+ }
+ else
+ {
+ attr = EA_4BYTE;
+ }
+
+ getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
+ regTracker.rsTrackRegTrash(scratchReg);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+ regTracker.rsTrackRegTrash(REG_R0);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+ 0, // argSize
+ EA_UNKNOWN); // retSize
+
+ // Restore back to the state that existed before profiler callback
+ gcInfo.gcMarkRegSetNpt(scratchReg);
+ getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
+ regTracker.rsTrackRegTrash(REG_R0);
+ regSet.rsUnlockReg(genRegMask(scratchReg));
+ regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
+#else
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
+#endif //_TARGET_X86_
+
+ /* Restore the stack level */
+ genStackLevel = saveStackLvl2;
+ }
+
+#endif // PROFILING_SUPPORTED
+
+#ifdef DEBUG
+ /*-------------------------------------------------------------------------
+ * Generate an ESP check for the call
+ */
+
+ if (compiler->opts.compStackCheckOnCall
+#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
+ // check the stacks as frequently as possible
+ && !call->IsHelperCall()
+#else
+ && call->gtCall.gtCallType == CT_USER_FUNC
+#endif
+ )
+ {
+ noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
+ }
+#endif
+
+ /*-------------------------------------------------------------------------
+ * Generate the call
+ */
+
+ bool fPossibleSyncHelperCall = false;
+ CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
+
+ bool fTailCallTargetIsVSD = false;
+
+ bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
+
+ /* Check for Delegate.Invoke. If so, we inline it. We get the
+ target-object and target-function from the delegate-object, and do
+ an indirect call.
+ */
+
+ if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
+ {
+ noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
+
+ assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) &
+ (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
+ (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
+
+ /* Find the offsets of the 'this' pointer and new target */
+
+ CORINFO_EE_INFO* pInfo;
+ unsigned instOffs; // offset of new 'this' pointer
+ unsigned firstTgtOffs; // offset of first target to invoke
+ const regNumber regThis = genGetThisArgReg(call);
+
+ pInfo = compiler->eeGetEEInfo();
+ instOffs = pInfo->offsetOfDelegateInstance;
+ firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
+
+#ifdef _TARGET_ARM_
+ if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_VIRTUAL_STUB_PARAM, regThis,
+ pInfo->offsetOfSecureDelegateIndirectCell);
+ regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+ }
+#endif // _TARGET_ARM_
+
+ // Grab an available register to use for the CALL indirection
+ regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
+
+ // Save the invoke-target-function in indCallReg
+ // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
+ regTracker.rsTrackRegTrash(regThis);
+ noway_assert(instOffs < 127);
+
+ /* Call through indCallReg */
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+ NULL, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
+ }
+ else
+
+ /*-------------------------------------------------------------------------
+ * Virtual and interface calls
+ */
+
+ switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+ {
+ case GTF_CALL_VIRT_STUB:
+ {
+ regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
+
+ // An x86 JIT which uses full stub dispatch must generate only
+ // the following stub dispatch calls:
+ //
+ // (1) isCallRelativeIndirect:
+ // call dword ptr [rel32] ; FF 15 ---rel32----
+ // (2) isCallRelative:
+ // call abc ; E8 ---rel32----
+ // (3) isCallRegisterIndirect:
+ // 3-byte nop ;
+ // call dword ptr [eax] ; FF 10
+ //
+ // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+ // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+
+ //
+ // Please do not insert any Random NOPs while constructing this VSD call
+ //
+ getEmitter()->emitDisableRandomNops();
+
+ if (!fTailCall)
+ {
+ // This is code to set up an indirect call to a stub address computed
+ // via dictionary lookup. However the dispatch stub receivers aren't set up
+ // to accept such calls at the moment.
+ if (callType == CT_INDIRECT)
+ {
+ regNumber indReg;
+
+ // -------------------------------------------------------------------------
+ // The importer decided we needed a stub call via a computed
+ // stub dispatch address, i.e. an address which came from a dictionary lookup.
+ // - The dictionary lookup produces an indirected address, suitable for call
+ // via "call [REG_VIRTUAL_STUB_PARAM]"
+ //
+ // This combination will only be generated for shared generic code and when
+ // stub dispatch is active.
+
+ // No need to null check the this pointer - the dispatch code will deal with this.
+
+ noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+ // Now put the address in REG_VIRTUAL_STUB_PARAM.
+ // This is typically a nop when the register used for
+ // the gtCallAddr is REG_VIRTUAL_STUB_PARAM
+ //
+ inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr);
+ regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+
+#if defined(_TARGET_X86_)
+ // Emit enough bytes of nops so that this sequence can be distinguished
+ // from other virtual stub dispatch calls.
+ //
+ // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+ // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+ //
+ getEmitter()->emitIns_Nop(3);
+
+ // Make the virtual stub call:
+ // call [REG_VIRTUAL_STUB_PARAM]
+ //
+ emitCallType = emitter::EC_INDIR_ARD;
+
+ indReg = REG_VIRTUAL_STUB_PARAM;
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+#elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
+
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+ // Make the virtual stub call:
+ // ldr indReg, [REG_VIRTUAL_STUB_PARAM]
+ // call indReg
+ //
+ emitCallType = emitter::EC_INDIR_R;
+
+ // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg'
+ //
+ indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM);
+ assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0);
+ regTracker.rsTrackRegTrash(indReg);
+
+#else
+#error "Unknown target for VSD call"
+#endif
+
+ getEmitter()->emitIns_Call(emitCallType,
+ NULL, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, indReg);
+ }
+ else
+ {
+ // -------------------------------------------------------------------------
+ // Check for a direct stub call.
+ //
+
+ // Get stub addr. This will return NULL if virtual call stubs are not active
+ void* stubAddr = NULL;
+
+ stubAddr = (void*)call->gtCall.gtStubCallStubAddr;
+
+ noway_assert(stubAddr != NULL);
+
+ // -------------------------------------------------------------------------
+ // Direct stub calls, though the stubAddr itself may still need to be
+ // accesed via an indirection.
+ //
+
+ // No need to null check - the dispatch code will deal with null this.
+
+ emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
+ void* addr = stubAddr;
+ int disp = 0;
+ regNumber callReg = REG_NA;
+
+ if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+ {
+#if CPU_LOAD_STORE_ARCH
+ callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM);
+ noway_assert(callReg == REG_VIRTUAL_STUB_PARAM);
+
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_VIRTUAL_STUB_PARAM, (ssize_t)stubAddr);
+ // The stub will write-back to this register, so don't track it
+ regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM,
+ REG_VIRTUAL_STUB_PARAM, 0);
+ regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
+ callTypeStubAddr = emitter::EC_INDIR_R;
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+ NULL, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM);
+
+#else
+ // emit an indirect call
+ callTypeStubAddr = emitter::EC_INDIR_C;
+ addr = 0;
+ disp = (ssize_t)stubAddr;
+#endif
+ }
+#if CPU_LOAD_STORE_ARCH
+ if (callTypeStubAddr != emitter::EC_INDIR_R)
+#endif
+ {
+ getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCall.gtCallMethHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
+ }
+ }
+ }
+ else // tailCall is true
+ {
+
+// Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
+// moves the this pointer out of it's usual place and into the argument list.
+#ifdef _TARGET_X86_
+
+ // Generate "cmp ECX, [ECX]" to trap null pointers
+ const regNumber regThis = genGetThisArgReg(call);
+ getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+
+#endif // _TARGET_X86_
+
+ if (callType == CT_INDIRECT)
+ {
+ noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+ // Now put the address in EAX.
+ inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
+ regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+ }
+ else
+ {
+ // importer/EE should guarantee the indirection
+ noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
+
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
+ ssize_t(call->gtCall.gtStubCallStubAddr));
+ }
+
+ fTailCallTargetIsVSD = true;
+ }
+
+ //
+ // OK to start inserting random NOPs again
+ //
+ getEmitter()->emitEnableRandomNops();
+ }
+ break;
+
+ case GTF_CALL_VIRT_VTABLE:
+ // stub dispatching is off or this is not a virtual call (could be a tailcall)
+ {
+ regNumber vptrReg;
+ unsigned vtabOffsOfIndirection;
+ unsigned vtabOffsAfterIndirection;
+
+ noway_assert(callType == CT_USER_FUNC);
+
+ vptrReg =
+ regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
+ vptrMask = genRegMask(vptrReg);
+
+ /* The register no longer holds a live pointer value */
+ gcInfo.gcMarkRegSetNpt(vptrMask);
+
+ // MOV vptrReg, [REG_CALL_THIS + offs]
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
+ VPTR_OFFS);
+ regTracker.rsTrackRegTrash(vptrReg);
+
+ noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask);
+
+ /* Get hold of the vtable offset (note: this might be expensive) */
+
+ compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd,
+ &vtabOffsOfIndirection,
+ &vtabOffsAfterIndirection);
+
+ /* Get the appropriate vtable chunk */
+
+ /* The register no longer holds a live pointer value */
+ gcInfo.gcMarkRegSetNpt(vptrMask);
+
+ // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
+ vtabOffsOfIndirection);
+
+ /* Call through the appropriate vtable slot */
+
+ if (fTailCall)
+ {
+ /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
+ vtabOffsAfterIndirection);
+ }
+ else
+ {
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
+ vtabOffsAfterIndirection);
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCall.gtCallMethHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset,
+ vptrReg); // ireg
+#else
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCall.gtCallMethHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset,
+ vptrReg, // ireg
+ REG_NA, // xreg
+ 0, // xmul
+ vtabOffsAfterIndirection); // disp
+#endif // CPU_LOAD_STORE_ARCH
+ }
+ }
+ break;
+
+ case GTF_CALL_NONVIRT:
+ {
+ //------------------------ Non-virtual/Indirect calls -------------------------
+ // Lots of cases follow
+ // - Direct P/Invoke calls
+ // - Indirect calls to P/Invoke functions via the P/Invoke stub
+ // - Direct Helper calls
+ // - Indirect Helper calls
+ // - Direct calls to known addresses
+ // - Direct calls where address is accessed by one or two indirections
+ // - Indirect calls to computed addresses
+ // - Tailcall versions of all of the above
+
+ CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
+
+ //------------------------------------------------------
+ // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
+ //
+ // For (final and private) functions which were called with
+ // invokevirtual, but which we call directly, we need to
+ // dereference the object pointer to make sure it's not NULL.
+ //
+
+ if (call->gtFlags & GTF_CALL_NULLCHECK)
+ {
+ /* Generate "cmp ECX, [ECX]" to trap null pointers */
+ const regNumber regThis = genGetThisArgReg(call);
+#if CPU_LOAD_STORE_ARCH
+ regNumber indReg =
+ regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
+ regTracker.rsTrackRegTrash(indReg);
+#else
+ getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+#endif
+ }
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ //------------------------------------------------------
+ // Non-virtual/Indirect calls: PInvoke calls.
+
+ noway_assert(compiler->info.compCallUnmanaged != 0);
+
+ /* args shouldn't be greater than 64K */
+
+ noway_assert((argSize & 0xffff0000) == 0);
+
+ /* Remember the varDsc for the callsite-epilog */
+
+ frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+
+ // exact codegen is required
+ getEmitter()->emitDisableRandomNops();
+
+ int nArgSize = 0;
+
+ regNumber indCallReg = REG_NA;
+
+ if (callType == CT_INDIRECT)
+ {
+ noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+ if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL)
+ indCallReg = call->gtCall.gtCallAddr->gtRegNum;
+
+ nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
+ methHnd = 0;
+ }
+ else
+ {
+ noway_assert(callType == CT_USER_FUNC);
+ }
+
+ regNumber tcbReg;
+ tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
+
+ void* addr = NULL;
+
+ if (callType == CT_INDIRECT)
+ {
+ /* Double check that the callee didn't use/trash the
+ registers holding the call target.
+ */
+ noway_assert(tcbReg != indCallReg);
+
+ if (indCallReg == REG_NA)
+ {
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+ // indirection
+
+ /* Please note that this even works with tcbReg == REG_EAX.
+ tcbReg contains an interesting value only if frameListRoot is
+ an enregistered local that stays alive across the call
+ (certainly not EAX). If frameListRoot has been moved into
+ EAX, we can trash it since it won't survive across the call
+ anyways.
+ */
+
+ inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr);
+ regTracker.rsTrackRegTrash(indCallReg);
+ }
+
+ emitCallType = emitter::EC_INDIR_R;
+ }
+ else
+ {
+ noway_assert(callType == CT_USER_FUNC);
+
+ void* pAddr;
+ addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
+ if (addr != NULL)
+ {
+#if CPU_LOAD_STORE_ARCH
+ // Load the address into a register, indirect it and call through a register
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+ // indirection
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+ // Now make the call "call indCallReg"
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+ methHnd, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
+ NULL, // addr
+ args,
+ retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
+
+ emitCallType = emitter::EC_INDIR_R;
+ break;
+#else
+ emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
+ indCallReg = REG_NA;
+#endif
+ }
+ else
+ {
+ // Double-indirection. Load the address into a register
+ // and call indirectly through a register
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+ // indirection
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ emitCallType = emitter::EC_INDIR_R;
+
+#else
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
+ regTracker.rsTrackRegTrash(indCallReg);
+ emitCallType = emitter::EC_INDIR_ARD;
+
+#endif // CPU_LOAD_STORE_ARCH
+ }
+ }
+
+ getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
+ INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ ilOffset, indCallReg);
+
+ if (callType == CT_INDIRECT)
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+ getEmitter()->emitEnableRandomNops();
+
+ // Done with PInvoke calls
+ break;
+ }
+
+ if (callType == CT_INDIRECT)
+ {
+ noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+ if (call->gtCall.gtCallCookie)
+ {
+ //------------------------------------------------------
+ // Non-virtual indirect calls via the P/Invoke stub
+
+ GenTreePtr cookie = call->gtCall.gtCallCookie;
+ GenTreePtr target = call->gtCall.gtCallAddr;
+
+ noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
+
+ noway_assert(cookie->gtOper == GT_CNS_INT ||
+ cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
+
+ noway_assert(args == argSize);
+
+#if defined(_TARGET_X86_)
+ /* load eax with the real target */
+
+ inst_RV_TT(INS_mov, REG_EAX, target);
+ regTracker.rsTrackRegTrash(REG_EAX);
+
+ if (cookie->gtOper == GT_CNS_INT)
+ inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
+ else
+ inst_TT(INS_push, cookie);
+
+ /* Keep track of ESP for EBP-less frames */
+ genSinglePush();
+
+ argSize += sizeof(void*);
+
+#elif defined(_TARGET_ARM_)
+
+ // Ensure that we spill these registers (if caller saved) in the prolog
+ regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+ // ARM: load r12 with the real target
+ // X64: load r10 with the real target
+ inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
+
+ // ARM: load r4 with the pinvoke VASigCookie
+ // X64: load r11 with the pinvoke VASigCookie
+ if (cookie->gtOper == GT_CNS_INT)
+ inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
+ EA_HANDLE_CNS_RELOC);
+ else
+ inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
+
+ noway_assert(args == argSize);
+
+ // Ensure that we don't trash any of these registers if we have to load
+ // the helper call target into a register to invoke it.
+ regMaskTP regsUsed;
+ regSet.rsLockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
+ RBM_PINVOKE_COOKIE_PARAM,
+ &regsUsed);
+#else
+ NYI("Non-virtual indirect calls via the P/Invoke stub");
+#endif
+
+ args = argSize;
+ noway_assert((size_t)(int)args == args);
+
+ genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
+
+#if defined(_TARGET_ARM_)
+ regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
+ RBM_PINVOKE_COOKIE_PARAM,
+ regsUsed);
+#endif
+
+#ifdef _TARGET_ARM_
+ // genEmitHelperCall doesn't record all registers a helper call would trash.
+ regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
+#endif
+ }
+ else
+ {
+ //------------------------------------------------------
+ // Non-virtual indirect calls
+
+ if (fTailCall)
+ {
+ inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
+ regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+ }
+ else
+ instEmit_indCall(call, args, retSize);
+ }
+
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+ // Done with indirect calls
+ break;
+ }
+
+ //------------------------------------------------------
+ // Non-virtual direct/indirect calls: Work out if the address of the
+ // call is known at JIT time (if not it is either an indirect call
+ // or the address must be accessed via an single/double indirection)
+
+ noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
+
+ void* addr;
+ InfoAccessType accessType;
+
+ helperNum = compiler->eeGetHelperNum(methHnd);
+
+ if (callType == CT_HELPER)
+ {
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void* pAddr;
+ addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+ accessType = IAT_VALUE;
+
+ if (!addr)
+ {
+ accessType = IAT_PVALUE;
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+
+ if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+
+ if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+ accessType = addrInfo.accessType;
+ addr = addrInfo.addr;
+ }
+
+ if (fTailCall)
+ {
+ noway_assert(callType == CT_USER_FUNC);
+
+ switch (accessType)
+ {
+ case IAT_VALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to known addressess
+ //
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+ break;
+
+ case IAT_PVALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to addresses accessed by
+ // a single indirection.
+ //
+ // For tailcalls we place the target address in REG_TAILCALL_ADDR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+ {
+ regNumber indReg = REG_TAILCALL_ADDR;
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+ regTracker.rsTrackRegTrash(indReg);
+ }
+#else
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+ regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+#endif
+ break;
+
+ case IAT_PPVALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to addresses accessed by
+ // a double indirection.
+ //
+ // For tailcalls we place the target address in REG_TAILCALL_ADDR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+ {
+ regNumber indReg = REG_TAILCALL_ADDR;
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+ regTracker.rsTrackRegTrash(indReg);
+ }
+#else
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
+ REG_TAILCALL_ADDR, 0);
+ regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+#endif
+ break;
+
+ default:
+ noway_assert(!"Bad accessType");
+ break;
+ }
+ }
+ else
+ {
+ switch (accessType)
+ {
+ regNumber indCallReg;
+
+ case IAT_VALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to known addressess
+ //
+ // The vast majority of calls end up here.... Wouldn't
+ // it be nice if they all did!
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef _TARGET_ARM_
+ if (!arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ // Load the address into a register and call through a register
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
+ // CALL indirection
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
+ indCallReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helperNum));
+ }
+ else
+#endif
+ {
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
+ 0, /* ireg, xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helperNum));
+ }
+ break;
+
+ case IAT_PVALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to addresses accessed by
+ // a single indirection.
+ //
+
+ // Load the address into a register, load indirect and call through a register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if CPU_LOAD_STORE_ARCH
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+ // indirection
+
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ emitCallType = emitter::EC_INDIR_R;
+ addr = NULL;
+
+#else
+ emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
+ indCallReg = REG_NA;
+
+#endif // CPU_LOAD_STORE_ARCH
+
+ getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
+ retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset,
+ indCallReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, /* isJump */
+ emitter::emitNoGChelper(helperNum));
+ break;
+
+ case IAT_PPVALUE:
+ {
+ //------------------------------------------------------
+ // Non-virtual direct calls to addresses accessed by
+ // a double indirection.
+ //
+ // Double-indirection. Load the address into a register
+ // and call indirectly through the register
+
+ noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+ // Grab an available register to use for the CALL indirection
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT);
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ emitCallType = emitter::EC_INDIR_R;
+
+#else
+
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ emitCallType = emitter::EC_INDIR_ARD;
+
+#endif // CPU_LOAD_STORE_ARCH
+
+ getEmitter()->emitIns_Call(emitCallType, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset,
+ indCallReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helperNum));
+ }
+ break;
+
+ default:
+ noway_assert(!"Bad accessType");
+ break;
+ }
+
+ // tracking of region protected by the monitor in synchronized methods
+ if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
+ {
+ fPossibleSyncHelperCall = true;
+ }
+ }
+ }
+ break;
+
+ default:
+ noway_assert(!"strange call type");
+ break;
+ }
+
+ /*-------------------------------------------------------------------------
+ * For tailcalls, REG_INTRET contains the address of the target function,
+ * enregistered args are in the correct registers, and the stack arguments
+ * have been pushed on the stack. Now call the stub-sliding helper
+ */
+
+ if (fTailCall)
+ {
+
+ if (compiler->info.compCallUnmanaged)
+ genPInvokeMethodEpilog();
+
+#ifdef _TARGET_X86_
+ noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
+
+ // Push the count of the incoming stack arguments
+
+ unsigned nOldStkArgs =
+ (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*));
+ getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
+ genSinglePush(); // Keep track of ESP for EBP-less frames
+ args += sizeof(void*);
+
+ // Push the count of the outgoing stack arguments
+
+ getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*));
+ genSinglePush(); // Keep track of ESP for EBP-less frames
+ args += sizeof(void*);
+
+ // Push info about the callee-saved registers to be restored
+ // For now, we always spill all registers if compiler->compTailCallUsed
+
+ DWORD calleeSavedRegInfo = 1 | // always restore EDI,ESI,EBX
+ (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
+ getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
+ genSinglePush(); // Keep track of ESP for EBP-less frames
+ args += sizeof(void*);
+
+ // Push the address of the target function
+
+ getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
+ genSinglePush(); // Keep track of ESP for EBP-less frames
+ args += sizeof(void*);
+
+#else // _TARGET_X86_
+
+ args = 0;
+ retSize = EA_UNKNOWN;
+
+#endif // _TARGET_X86_
+
+ if (compiler->getNeedsGSSecurityCookie())
+ {
+ genEmitGSCookieCheck(true);
+ }
+
+ // TailCall helper does not poll for GC. An explicit GC poll
+ // Should have been placed in when we morphed this into a tail call.
+ noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
+
+ // Now call the helper
+
+ genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Done with call.
+ * Trash registers, pop arguments if needed, etc
+ */
+
+ /* Mark the argument registers as free */
+
+ noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+
+ for (areg = 0; areg < MAX_REG_ARG; areg++)
+ {
+ regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
+
+ // Is this one of the used argument registers?
+ if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
+ continue;
+
+#ifdef _TARGET_ARM_
+ if (regSet.rsUsedTree[areg] == NULL)
+ {
+ noway_assert(areg % 2 == 1 &&
+ (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
+ (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
+ continue;
+ }
+#endif
+
+ regSet.rsMarkRegFree(curArgMask);
+
+ // We keep regSet.rsMaskVars current during codegen, so we have to remove any
+ // that have been copied into arg regs.
+
+ regSet.RemoveMaskVars(curArgMask);
+ gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
+ gcInfo.gcRegByrefSetCur &= ~(curArgMask);
+ }
+
+#if !FEATURE_STACK_FP_X87
+ //-------------------------------------------------------------------------
+ // free up the FP args
+
+ for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
+ {
+ regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
+ regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
+
+ // Is this one of the used argument registers?
+ if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
+ continue;
+
+ regSet.rsMaskUsed &= ~curArgMask;
+ regSet.rsUsedTree[argRegNum] = NULL;
+ }
+#endif // !FEATURE_STACK_FP_X87
+
+ /* restore the old argument register status */
+
+ intRegState.rsCurRegArgNum = savCurIntArgReg;
+ floatRegState.rsCurRegArgNum = savCurFloatArgReg;
+
+ noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+
+ /* Mark all trashed registers as such */
+
+ if (calleeTrashedRegs)
+ regTracker.rsTrashRegSet(calleeTrashedRegs);
+
+ regTracker.rsTrashRegsForGCInterruptability();
+
+#ifdef DEBUG
+
+ if (!(call->gtFlags & GTF_CALL_POP_ARGS))
+ {
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tEnd call ");
+ Compiler::printTreeID(call);
+ printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
+ }
+ noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
+ }
+
+#endif
+
+#if FEATURE_STACK_FP_X87
+ /* All float temps must be spilled around function calls */
+ if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+ {
+ noway_assert(compCurFPState.m_uStackSize == 1);
+ }
+ else
+ {
+ noway_assert(compCurFPState.m_uStackSize == 0);
+ }
+#else
+ if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+ {
+#ifdef _TARGET_ARM_
+ if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP)
+ {
+ // Result return for vararg methods is in r0, r1, but our callers would
+ // expect the return in s0, s1 because of floating type. Do the move now.
+ if (call->gtType == TYP_FLOAT)
+ {
+ inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
+ }
+ else
+ {
+ inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
+ }
+ }
+#endif
+ genMarkTreeInReg(call, REG_FLOATRET);
+ }
+#endif
+
+ /* The function will pop all arguments before returning */
+
+ genStackLevel = saveStackLvl;
+
+ /* No trashed registers may possibly hold a pointer at this point */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+
+ regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
+ ~regSet.rsMaskVars & ~vptrMask;
+ if (ptrRegs)
+ {
+ // A reg may be dead already. The assertion is too strong.
+ LclVarDsc* varDsc;
+ unsigned varNum;
+
+ // use compiler->compCurLife
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
+
+ if (!varDsc->lvTracked)
+ continue;
+ if (!varDsc->lvRegister)
+ continue;
+ if (varDsc->IsFloatRegType())
+ continue;
+
+ /* Get hold of the index and the bitmask for the variable */
+
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ /* Is this variable live currently? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
+ {
+ regNumber regNum = varDsc->lvRegNum;
+ regMaskTP regMask = genRegMask(regNum);
+
+ if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
+ ptrRegs &= ~regMask;
+ }
+ }
+ if (ptrRegs)
+ {
+ printf("Bad call handling for ");
+ Compiler::printTreeID(call);
+ printf("\n");
+ noway_assert(!"A callee trashed reg is holding a GC pointer");
+ }
+ }
+#endif
+
+#if defined(_TARGET_X86_)
+ //-------------------------------------------------------------------------
+ // Create a label for tracking of region protected by the monitor in synchronized methods.
+ // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
+ // so the GC state vars have been updated before creating the label.
+
+ if (fPossibleSyncHelperCall)
+ {
+ switch (helperNum)
+ {
+ case CORINFO_HELP_MON_ENTER:
+ case CORINFO_HELP_MON_ENTER_STATIC:
+ noway_assert(compiler->syncStartEmitCookie == NULL);
+ compiler->syncStartEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ noway_assert(compiler->syncStartEmitCookie != NULL);
+ break;
+ case CORINFO_HELP_MON_EXIT:
+ case CORINFO_HELP_MON_EXIT_STATIC:
+ noway_assert(compiler->syncEndEmitCookie == NULL);
+ compiler->syncEndEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ noway_assert(compiler->syncEndEmitCookie != NULL);
+ break;
+ default:
+ break;
+ }
+ }
+#endif // _TARGET_X86_
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ genDefineTempLabel(returnLabel);
+
+#ifdef _TARGET_X86_
+ if (getInlinePInvokeCheckEnabled())
+ {
+ noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+ BasicBlock* esp_check;
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+ /* mov ecx, dword ptr [frame.callSiteTracker] */
+
+ getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+
+ /* Generate the conditional jump */
+
+ if (!(call->gtFlags & GTF_CALL_POP_ARGS))
+ {
+ if (argSize)
+ {
+ getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
+ }
+ }
+ /* cmp ecx, esp */
+
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
+
+ esp_check = genCreateTempLabel();
+
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+
+ getEmitter()->emitIns(INS_BREAKPOINT);
+
+ /* genCondJump() closes the current emitter block */
+
+ genDefineTempLabel(esp_check);
+ }
+#endif
+ }
+
+ /* Are we supposed to pop the arguments? */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_)
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) ||
+ compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
+ {
+ // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
+ // take care of the cdecl argument popping here as well but the stack depth tracking logic
+ // makes this very hard, i.e. it needs to "see" the actual pop.
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+
+ if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
+ {
+ /* mov esp, dword ptr [frame.callSiteTracker] */
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
+ compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+ }
+ else
+ {
+ /* mov ecx, dword ptr [frame.callSiteTracker] */
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
+ compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+
+ /* lea esp, [ecx + argSize] */
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
+ }
+ }
+ }
+#endif // _TARGET_X86_
+
+ if (call->gtFlags & GTF_CALL_POP_ARGS)
+ {
+ noway_assert(args == (size_t) - (int)argSize);
+
+ if (argSize)
+ {
+ genAdjustSP(argSize);
+ }
+ }
+
+ if (pseudoStackLvl)
+ {
+ noway_assert(call->gtType == TYP_VOID);
+
+ /* Generate NOP */
+
+ instGen(INS_nop);
+ }
+
+ /* What does the function return? */
+
+ retVal = RBM_NONE;
+
+ switch (call->gtType)
+ {
+ case TYP_REF:
+ case TYP_ARRAY:
+ case TYP_BYREF:
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
+
+ __fallthrough;
+
+ case TYP_INT:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+#endif
+ retVal = RBM_INTRET;
+ break;
+
+#ifdef _TARGET_ARM_
+ case TYP_STRUCT:
+ {
+ assert(call->gtCall.gtRetClsHnd != NULL);
+ assert(compiler->IsHfa(call->gtCall.gtRetClsHnd));
+ int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd);
+ assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
+ assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
+ retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
+ }
+ break;
+#endif
+
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+ retVal = RBM_LNGRET;
+ break;
+
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+ break;
+#endif
+
+ case TYP_VOID:
+ break;
+
+ default:
+ noway_assert(!"unexpected/unhandled fn return type");
+ }
+
+ // We now have to generate the "call epilog" (if it was a call to unmanaged code).
+ /* if it is a call to unmanaged code, frameListRoot must be set */
+
+ noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
+
+ if (frameListRoot)
+ genPInvokeCallEpilog(frameListRoot, retVal);
+
+ if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
+ {
+ if (frameListRoot->lvRegister)
+ {
+ bool isBorn = false;
+ bool isDying = true;
+ genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
+ }
+ }
+
+#ifdef DEBUG
+ if (compiler->opts.compStackCheckOnCall
+#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
+ // check the stack as frequently as possible
+ && !call->IsHelperCall()
+#else
+ && call->gtCall.gtCallType == CT_USER_FUNC
+#endif
+ )
+ {
+ noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
+ if (argSize > 0)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
+ getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+ }
+ else
+ getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif // DEBUG
+
+#if FEATURE_STACK_FP_X87
+ UnspillRegVarsStackFp();
+#endif // FEATURE_STACK_FP_X87
+
+ if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+ {
+ // Restore return node if necessary
+ if (call->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(call);
+ }
+
+#if FEATURE_STACK_FP_X87
+ // Mark as free
+ regSet.SetUsedRegFloat(call, false);
+#endif
+ }
+
+#if FEATURE_STACK_FP_X87
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ JitDumpFPState();
+ }
+#endif
+#endif
+
+ return retVal;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Create and record GC Info for the function.
+ */
+#ifdef JIT32_GCENCODER
+void*
+#else
+void
+#endif
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
+{
+#ifdef JIT32_GCENCODER
+ return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+#else
+ genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+#endif
+}
+
+#ifdef JIT32_GCENCODER
+void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr))
+{
+ BYTE headerBuf[64];
+ InfoHdr header;
+
+ int s_cached;
+#ifdef DEBUG
+ size_t headerSize =
+#endif
+ compiler->compInfoBlkSize =
+ gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+ size_t argTabOffset = 0;
+ size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
+
+#if DISPLAY_SIZES
+
+ if (genInterruptible)
+ {
+ gcHeaderISize += compiler->compInfoBlkSize;
+ gcPtrMapISize += ptrMapSize;
+ }
+ else
+ {
+ gcHeaderNSize += compiler->compInfoBlkSize;
+ gcPtrMapNSize += ptrMapSize;
+ }
+
+#endif // DISPLAY_SIZES
+
+ compiler->compInfoBlkSize += ptrMapSize;
+
+ /* Allocate the info block for the method */
+
+ compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
+
+#if 0 // VERBOSE_SIZES
+ // TODO-Review: 'dataSize', below, is not defined
+
+// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
+ {
+ printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
+ compiler->info.compILCodeSize,
+ compiler->compInfoBlkSize,
+ codeSize + dataSize,
+ codeSize + dataSize - prologSize - epilogSize,
+ 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
+ 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
+ compiler->info.compClassName,
+ compiler->info.compMethodName);
+ }
+
+#endif
+
+ /* Fill in the info block and return it to the caller */
+
+ void* infoPtr = compiler->compInfoBlkAddr;
+
+ /* Create the method info block: header followed by GC tracking tables */
+
+ compiler->compInfoBlkAddr +=
+ gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+ assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
+ compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
+ assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
+
+#ifdef DEBUG
+
+ if (0)
+ {
+ BYTE* temp = (BYTE*)infoPtr;
+ unsigned size = compiler->compInfoBlkAddr - temp;
+ BYTE* ptab = temp + headerSize;
+
+ noway_assert(size == headerSize + ptrMapSize);
+
+ printf("Method info block - header [%u bytes]:", headerSize);
+
+ for (unsigned i = 0; i < size; i++)
+ {
+ if (temp == ptab)
+ {
+ printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
+ printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
+ }
+ else
+ {
+ if (!(i % 16))
+ printf("\n %04X: ", i);
+ }
+
+ printf("%02X ", *temp++);
+ }
+
+ printf("\n");
+ }
+
+#endif // DEBUG
+
+#if DUMP_GC_TABLES
+
+ if (compiler->opts.dspGCtbls)
+ {
+ const BYTE* base = (BYTE*)infoPtr;
+ unsigned size;
+ unsigned methodSize;
+ InfoHdr dumpHeader;
+
+ printf("GC Info for method %s\n", compiler->info.compFullName);
+ printf("GC info size = %3u\n", compiler->compInfoBlkSize);
+
+ size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
+ // printf("size of header encoding is %3u\n", size);
+ printf("\n");
+
+ if (compiler->opts.dspGCtbls)
+ {
+ base += size;
+ size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
+ // printf("size of pointer table is %3u\n", size);
+ printf("\n");
+ noway_assert(compiler->compInfoBlkAddr == (base + size));
+ }
+ }
+
+#ifdef DEBUG
+ if (jitOpts.testMask & 128)
+ {
+ for (unsigned offs = 0; offs < codeSize; offs++)
+ {
+ gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
+ }
+ }
+#endif // DEBUG
+#endif // DUMP_GC_TABLES
+
+ /* Make sure we ended up generating the expected number of bytes */
+
+ noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
+
+ return infoPtr;
+}
+
+#else // JIT32_GCENCODER
+
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+ IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+ GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
+ GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+ assert(gcInfoEncoder);
+
+ // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+ gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+ // First we figure out the encoder ID's for the stack slots and registers.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+ // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+ gcInfoEncoder->FinalizeSlotIds();
+ // Now we can actually use those slot ID's to declare live ranges.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+ gcInfoEncoder->Build();
+
+ // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+ // let's save the values anyway for debugging purposes
+ compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+ compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+#endif
+
+/*****************************************************************************
+ * For CEE_LOCALLOC
+ */
+
+regNumber CodeGen::genLclHeap(GenTreePtr size)
+{
+ noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+ // regCnt is a register used to hold both
+ // the amount to stack alloc (either in bytes or pointer sized words)
+ // and the final stack alloc address to return as the result
+ //
+ regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
+ var_types type = genActualType(size->gtType);
+ emitAttr easz = emitTypeSize(type);
+
+#ifdef DEBUG
+ // Verify ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+
+ noway_assert(isFramePointerUsed());
+ noway_assert(genStackLevel == 0); // Can't have anything on the stack
+
+ BasicBlock* endLabel = NULL;
+#if FEATURE_FIXED_OUT_ARGS
+ bool stackAdjusted = false;
+#endif
+
+ if (size->IsCnsIntOrI())
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP
+ // essentially popping off the outgoing arg area,
+ // We will restore it right before we return from this method
+ //
+ if (compiler->lvaOutgoingArgSpaceSize > 0)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+ 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjusted = true;
+ }
+#endif
+ size_t amount = size->gtIntCon.gtIconVal;
+
+ // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
+ amount += (STACK_ALIGN - 1);
+ amount &= ~(STACK_ALIGN - 1);
+ amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc
+ size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
+
+ /* If amount is zero then return null in RegCnt */
+ if (amount == 0)
+ {
+ regCnt = regSet.rsGrabReg(RBM_ALLINT);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+ goto DONE;
+ }
+
+ /* For small allocations we will generate up to six push 0 inline */
+ if (amount <= 6)
+ {
+ regCnt = regSet.rsGrabReg(RBM_ALLINT);
+#if CPU_LOAD_STORE_ARCH
+ regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+ // Set 'regZero' to zero
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
+#endif
+
+ while (amount != 0)
+ {
+#if CPU_LOAD_STORE_ARCH
+ inst_IV(INS_push, (unsigned)genRegMask(regZero));
+#else
+ inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
+#endif
+ amount--;
+ }
+
+ regTracker.rsTrackRegTrash(regCnt);
+ // --- move regCnt, ESP
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+ goto DONE;
+ }
+ else
+ {
+ if (!compiler->info.compInitMem)
+ {
+ // Re-bias amount to be number of bytes to adjust the SP
+ amount <<= STACK_ALIGN_SHIFT;
+ size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
+ if (amount < compiler->eeGetPageSize()) // must be < not <=
+ {
+ // Since the size is a page or less, simply adjust ESP
+
+ // ESP might already be in the guard page, must touch it BEFORE
+ // the alloc, not after.
+ regCnt = regSet.rsGrabReg(RBM_ALLINT);
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+#if CPU_LOAD_STORE_ARCH
+ regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
+ regTracker.rsTrackRegTrash(regTmp);
+#else
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+#endif
+ inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(regCnt);
+ goto DONE;
+ }
+ }
+ }
+ }
+
+ // Compute the size of the block to allocate
+ genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
+ noway_assert(size->gtFlags & GTF_REG_VAL);
+ regCnt = size->gtRegNum;
+
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP
+ // essentially popping off the outgoing arg area,
+ // We will restore it right before we return from this method
+ //
+ if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+ 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjusted = true;
+ }
+#endif
+
+ // Perform alignment if we don't have a GT_CNS size
+ //
+ if (!size->IsCnsIntOrI())
+ {
+ endLabel = genCreateTempLabel();
+
+ // If 0 we bail out
+ instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, endLabel);
+
+ // Align to STACK_ALIGN
+ inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+
+ if (compiler->info.compInitMem)
+ {
+#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
+ // regCnt will be the number of pointer-sized words to locAlloc
+ // If the shift right won't do the 'and' do it here
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+#endif
+ // --- shr regCnt, 2 ---
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
+ }
+ else
+ {
+ // regCnt will be the total number of bytes to locAlloc
+
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+ }
+ }
+
+ BasicBlock* loop;
+ loop = genCreateTempLabel();
+
+ if (compiler->info.compInitMem)
+ {
+ // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
+
+ /* Since we have to zero out the allocated memory AND ensure that
+ ESP is always valid by tickling the pages, we will just push 0's
+ on the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM_)
+ regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+ regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
+ // Set 'regZero1' and 'regZero2' to zero
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
+#endif
+
+ // Loop:
+ genDefineTempLabel(loop);
+
+#if defined(_TARGET_X86_)
+
+ inst_IV(INS_push_hide, 0); // --- push 0
+ // Are we done?
+ inst_RV(INS_dec, regCnt, type);
+
+#elif defined(_TARGET_ARM_)
+
+ inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
+ // Are we done?
+ inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
+
+#else
+ assert(!"Codegen missing");
+#endif // TARGETS
+
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ inst_JMP(jmpNotEqual, loop);
+
+ // Move the final value of ESP into regCnt
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
+ regTracker.rsTrackRegTrash(regCnt);
+ }
+ else
+ {
+ // At this point 'regCnt' is set to the total number of bytes to locAlloc
+
+ /* We don't need to zero out the allocated memory. However, we do have
+ to tickle the pages to ensure that ESP is always valid and is
+ in sync with the "stack guard page". Note that in the worst
+ case ESP is on the last byte of the guard page. Thus you must
+ touch ESP+0 first not ESP+x01000.
+
+ Another subtlety is that you don't want ESP to be exactly on the
+ boundary of the guard page because PUSH is predecrement, thus
+ call setup would not touch the guard page but just beyond it */
+
+ /* Note that we go through a few hoops so that ESP never points to
+ illegal pages at any time during the ticking process
+
+ neg REG
+ add REG, ESP // reg now holds ultimate ESP
+ jb loop // result is smaller than orignial ESP (no wrap around)
+ xor REG, REG, // Overflow, pick lowest possible number
+ loop:
+ test ESP, [ESP+0] // X86 - tickle the page
+ ldr REGH,[ESP+0] // ARM - tickle the page
+ mov REGH, ESP
+ sub REGH, PAGE_SIZE
+ mov ESP, REGH
+ cmp ESP, REG
+ jae loop
+
+ mov ESP, REG
+ end:
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+
+ inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
+ inst_JMP(EJ_hs, loop);
+#else
+ inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
+ inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
+ inst_JMP(EJ_jb, loop);
+#endif
+ regTracker.rsTrackRegTrash(regCnt);
+
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+ genDefineTempLabel(loop);
+
+ // This is a workaround to avoid the emitter trying to track the
+ // decrement of the ESP - we do the subtraction in another reg
+ // instead of adjusting ESP directly.
+
+ regNumber regTemp = regSet.rsPickReg();
+
+ // Tickle the decremented value, and move back to ESP,
+ // note that it has to be done BEFORE the update of ESP since
+ // ESP might already be on the guard page. It is OK to leave
+ // the final value of ESP on the guard page
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
+#else
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+#endif
+
+ inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(regTemp);
+
+ inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
+
+ genRecoverReg(size, RBM_ALLINT,
+ RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
+ noway_assert(size->gtFlags & GTF_REG_VAL);
+ regCnt = size->gtRegNum;
+ inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ inst_JMP(jmpGEU, loop);
+
+ // Move the final value to ESP
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
+ }
+ regSet.rsMarkRegFree(genRegMask(regCnt));
+
+DONE:
+
+ noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
+
+ if (endLabel != NULL)
+ genDefineTempLabel(endLabel);
+
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must readjust the SP
+ //
+ if (stackAdjusted)
+ {
+ assert(compiler->lvaOutgoingArgSpaceSize > 0);
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+ 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ }
+#endif
+
+ /* Write the lvaShadowSPfirst stack frame slot */
+ noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+ // Don't think it is worth it the codegen complexity to embed this
+ // when it's possible in each of the customized allocas.
+ if (compiler->opts.compNeedStackProbes)
+ {
+ genGenerateStackProbe();
+ }
+#endif
+
+#ifdef DEBUG
+ // Update new ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+ }
+#endif
+
+ return regCnt;
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+void CodeGen::genSetScopeInfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& varLoc)
+{
+ /* We need to do some mapping while reporting back these variables */
+
+ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+ noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+#ifdef _TARGET_X86_
+ // Non-x86 platforms are allowed to access all arguments directly
+ // so we don't need this code.
+
+ // Is this a varargs function?
+
+ if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
+ varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
+ {
+ noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
+
+ // All stack arguments (except the varargs handle) have to be
+ // accessed via the varargs cookie. Discard generated info,
+ // and just find its position relative to the varargs handle
+
+ PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
+ if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
+ {
+ noway_assert(!compiler->opts.compDbgCode);
+ return;
+ }
+
+ // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
+ // arguments of vararg functions to avoid reporting them to GC.
+ noway_assert(!compiler->lvaTable[varNum].lvRegister);
+ unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
+ unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
+
+ noway_assert(cookieOffset < varOffset);
+ unsigned offset = varOffset - cookieOffset;
+ unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
+ noway_assert(offset < stkArgSize);
+ offset = stkArgSize - offset;
+
+ varLoc.vlType = Compiler::VLT_FIXED_VA;
+ varLoc.vlFixedVarArg.vlfvOffset = offset;
+ }
+
+#endif // _TARGET_X86_
+
+ VarName name = NULL;
+
+#ifdef DEBUG
+
+ for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+ {
+ if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+ {
+ name = compiler->info.compVarScopes[scopeNum].vsdName;
+ }
+ }
+
+ // Hang on to this compiler->info.
+
+ TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+ tlvi.tlviVarNum = ilVarNum;
+ tlvi.tlviLVnum = LVnum;
+ tlvi.tlviName = name;
+ tlvi.tlviStartPC = startOffs;
+ tlvi.tlviLength = length;
+ tlvi.tlviAvailable = avail;
+ tlvi.tlviVarLoc = varLoc;
+
+#endif // DEBUG
+
+ compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+
+#endif // DEBUGGING_SUPPORT
+
+/*****************************************************************************
+ *
+ * Return non-zero if the given register is free after the given tree is
+ * evaluated (i.e. the register is either not used at all, or it holds a
+ * register variable which is not live after the given node).
+ * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
+ * constant operand, and one that's in a register. Thus, the only thing we
+ * need to determine is whether the register holding op1 is dead.
+ */
+bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
+{
+ regMaskTP vars;
+ regMaskTP mask = genRegMask(reg);
+
+ if (regSet.rsMaskUsed & mask)
+ return false;
+
+ assert(tree->gtOper == GT_ADD);
+ GenTreePtr regValTree = tree->gtOp.gtOp1;
+ if (!tree->gtOp.gtOp2->IsCnsIntOrI())
+ {
+ regValTree = tree->gtOp.gtOp2;
+ assert(tree->gtOp.gtOp1->IsCnsIntOrI());
+ }
+ assert(regValTree->gtFlags & GTF_REG_VAL);
+
+ /* At this point, the only way that the register will remain live
+ * is if it is itself a register variable that isn't dying.
+ */
+ assert(regValTree->gtRegNum == reg);
+ if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
+ return false;
+ else
+ return true;
+}
+
+/*****************************************************************************/
+//
+// This method calculates the USE and DEF values for a statement.
+// It also calls fgSetRngChkTarget for the statement.
+//
+// We refactor out this code from fgPerBlockLocalVarLiveness
+// and add QMARK logics to it.
+//
+// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
+//
+// The usage of this method is very limited.
+// We should only call it for the first node in the statement or
+// for the node after the GTF_RELOP_QMARK node.
+//
+// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
+
+/*
+ Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
+ when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
+ from both trees.
+
+ Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
+ linked by gtNext.
+
+ The algorithm we use is:
+ (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
+ (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
+ We cache copies of current fgCurDefSet and fgCurUseSet.
+ (The fact that it is recursively calling itself is for nested QMARK case,
+ where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
+ (3) We walk the thenTree.
+ (4) When we see GT_COLON node, we know that we just finished the thenTree.
+ We then make a copy of the current fgCurDefSet and fgCurUseSet,
+ restore them to the ones before the thenTree, and then continue walking
+ the elseTree.
+ (5) When we see the GT_QMARK node, we know we just finished the elseTree.
+ So we combine the results from the thenTree and elseTree and then return.
+
+
+ +--------------------+
+ | GT_QMARK 11|
+ +----------+---------+
+ |
+ *
+ / \
+ / \
+ / \
+ +---------------------+ +--------------------+
+ | GT_<cond> 3 | | GT_COLON 7 |
+ | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND |
+ +----------+----------+ +---------+----------+
+ | |
+ * *
+ / \ / \
+ / \ / \
+ / \ / \
+ 2 1 thenTree 6 elseTree 10
+ x | |
+ / * *
+ +----------------+ / / \ / \
+ |prevExpr->gtNext+------/ / \ / \
+ +----------------+ / \ / \
+ 5 4 9 8
+
+
+*/
+
+GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
+ GenTreePtr relopNode, // The node before the startNode.
+ // (It should either be NULL or
+ // a GTF_RELOP_QMARK node.)
+ GenTreePtr asgdLclVar)
+{
+ GenTreePtr tree;
+
+ VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so
+ VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree.
+
+ bool heapUse_BeforeSplit = fgCurHeapUse;
+ bool heapDef_BeforeSplit = fgCurHeapDef;
+ bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
+
+ VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store
+ // the USE and DEF sets after
+ VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
+
+ bool heapUse_AfterThenTree = fgCurHeapUse;
+ bool heapDef_AfterThenTree = fgCurHeapDef;
+ bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
+
+ // relopNode is either NULL or a GTF_RELOP_QMARK node.
+ assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
+
+ // If relopNode is NULL, then the startNode must be the 1st node of the statement.
+ // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
+ assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
+ (relopNode && startNode == relopNode->gtNext));
+
+ for (tree = startNode; tree; tree = tree->gtNext)
+ {
+ switch (tree->gtOper)
+ {
+
+ case GT_QMARK:
+
+ // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
+ noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
+
+ // By the time we see a GT_QMARK, we must have finished processing the elseTree.
+ // So it's the time to combine the results
+ // from the the thenTree and the elseTree, and then return.
+
+ VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
+ VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
+
+ fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree;
+ fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
+ fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree;
+
+ // Return the GT_QMARK node itself so the caller can continue from there.
+ // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
+ // in the "for" statement.
+ goto _return;
+
+ case GT_COLON:
+ // By the time we see GT_COLON, we must have just walked the thenTree.
+ // So we need to do two things here.
+ // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
+ // with the result from the elseTree.
+ // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
+ // and then continue walking the elseTree.
+ VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
+ VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
+
+ heapDef_AfterThenTree = fgCurHeapDef;
+ heapHavoc_AfterThenTree = fgCurHeapHavoc;
+ heapUse_AfterThenTree = fgCurHeapUse;
+
+ VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
+ VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
+
+ fgCurHeapDef = heapDef_BeforeSplit;
+ fgCurHeapHavoc = heapHavoc_BeforeSplit;
+ fgCurHeapUse = heapUse_BeforeSplit;
+
+ break;
+
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_LCL_VAR_ADDR:
+ case GT_LCL_FLD_ADDR:
+ case GT_STORE_LCL_VAR:
+ case GT_STORE_LCL_FLD:
+ fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+ break;
+
+ case GT_CLS_VAR:
+ // For Volatile indirection, first mutate the global heap
+ // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+ // This models Volatile reads as def-then-use of the heap.
+ // and allows for a CSE of a subsequent non-volatile read
+ if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
+ {
+ // For any Volatile indirection, we must handle it as a
+ // definition of the global heap
+ fgCurHeapDef = true;
+ }
+ // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
+ // assignment.
+ // Otherwise, we treat it as a use here.
+ if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+ {
+ fgCurHeapUse = true;
+ }
+ break;
+
+ case GT_IND:
+ // For Volatile indirection, first mutate the global heap
+ // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+ // This models Volatile reads as def-then-use of the heap.
+ // and allows for a CSE of a subsequent non-volatile read
+ if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+ {
+ // For any Volatile indirection, we must handle it as a
+ // definition of the global heap
+ fgCurHeapDef = true;
+ }
+
+ // If the GT_IND is the lhs of an assignment, we'll handle it
+ // as a heap def, when we get to assignment.
+ // Otherwise, we treat it as a use here.
+ if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
+ {
+ GenTreeLclVarCommon* dummyLclVarTree = NULL;
+ bool dummyIsEntire = false;
+ GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+ if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
+ {
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ }
+ else
+ {
+ // Defines a local addr
+ assert(dummyLclVarTree != nullptr);
+ fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+ }
+ }
+ break;
+
+ // These should have been morphed away to become GT_INDs:
+ case GT_FIELD:
+ case GT_INDEX:
+ unreached();
+ break;
+
+ // We'll assume these are use-then-defs of the heap.
+ case GT_LOCKADD:
+ case GT_XADD:
+ case GT_XCHG:
+ case GT_CMPXCHG:
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ fgCurHeapDef = true;
+ fgCurHeapHavoc = true;
+ break;
+
+ case GT_MEMORYBARRIER:
+ // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
+ fgCurHeapDef = true;
+ break;
+
+ // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
+ case GT_CALL:
+ {
+ GenTreeCall* call = tree->AsCall();
+ bool modHeap = true;
+ if (call->gtCallType == CT_HELPER)
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+
+ if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
+ {
+ modHeap = false;
+ }
+ }
+ if (modHeap)
+ {
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ fgCurHeapDef = true;
+ fgCurHeapHavoc = true;
+ }
+ }
+
+ // If this is a p/invoke unmanaged call or if this is a tail-call
+ // and we have an unmanaged p/invoke call in the method,
+ // then we're going to run the p/invoke epilog.
+ // So we mark the FrameRoot as used by this instruction.
+ // This ensures that the block->bbVarUse will contain
+ // the FrameRoot local var if is it a tracked variable.
+
+ if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
+ {
+ /* Get the TCB local and mark it as used */
+
+ noway_assert(info.compLvFrameListRoot < lvaCount);
+
+ LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (varDsc->lvTracked)
+ {
+ if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+ {
+ VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+ }
+ }
+ }
+
+ break;
+
+ default:
+
+ // Determine whether it defines a heap location.
+ if (tree->OperIsAssignment() || tree->OperIsBlkOp())
+ {
+ GenTreeLclVarCommon* dummyLclVarTree = NULL;
+ if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ {
+ // If it doesn't define a local, then it might update the heap.
+ fgCurHeapDef = true;
+ }
+ }
+
+ // Are we seeing a GT_<cond> for a GT_QMARK node?
+ if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
+ {
+ // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
+ // Recursively call fgLegacyPerStatementLocalVarLiveness.
+ // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
+ // current
+ // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
+ // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
+ // node.
+ tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
+
+ // We must have been returned here after seeing a GT_QMARK node.
+ noway_assert(tree->gtOper == GT_QMARK);
+ }
+
+ break;
+ }
+ }
+
+_return:
+ return tree;
+}
+
+/*****************************************************************************/
+
+/*****************************************************************************
+ * Initialize the TCB local and the NDirect stub, afterwards "push"
+ * the hoisted NDirect stub.
+ *
+ * 'initRegs' is the set of registers which will be zeroed out by the prolog
+ * typically initRegs is zero
+ *
+ * The layout of the NDirect Inlined Call Frame is as follows:
+ * (see VM/frames.h and VM/JITInterface.cpp for more information)
+ *
+ * offset field name when set
+ * --------------------------------------------------------------
+ * +00h vptr for class InlinedCallFrame method prolog
+ * +04h m_Next method prolog
+ * +08h m_Datum call site
+ * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
+ * +10h m_pCallerReturnAddress call site
+ * +14h m_pCalleeSavedRegisters not set by JIT
+ * +18h JIT retval spill area (int) before call_gc
+ * +1ch JIT retval spill area (long) before call_gc
+ * +20h Saved value of EBP method prolog
+ */
+
+regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
+{
+ assert(compiler->compGeneratingProlog);
+ noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
+ noway_assert(compiler->info.compCallUnmanaged);
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+ noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ /* let's find out if compLvFrameListRoot is enregistered */
+
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+
+ noway_assert(!varDsc->lvIsParam);
+ noway_assert(varDsc->lvType == TYP_I_IMPL);
+
+ DWORD threadTlsIndex, *pThreadTlsIndex;
+
+ threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
+#if defined(_TARGET_X86_)
+ if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
+#else
+ if (true)
+#endif
+ {
+ // Instead of calling GetThread(), and getting GS cookie and
+ // InlinedCallFrame vptr through indirections, we'll call only one helper.
+ // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
+ // and uses REG_PINVOKE_SCRATCH as scratch register.
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
+
+ // We're about to trask REG_PINVOKE_TCB, it better not be in use!
+ assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
+
+ // Don't use the argument registers (including the special argument in
+ // REG_PINVOKE_FRAME) for computing the target address.
+ regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
+
+ genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
+
+ regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
+
+ if (varDsc->lvRegister)
+ {
+ regNumber regTgt = varDsc->lvRegNum;
+
+ // we are about to initialize it. So turn the bit off in initRegs to prevent
+ // the prolog reinitializing it.
+ initRegs &= ~genRegMask(regTgt);
+
+ if (regTgt != REG_PINVOKE_TCB)
+ {
+ // move TCB to the its register if necessary
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
+ regTracker.rsTrackRegTrash(regTgt);
+ }
+ }
+ else
+ {
+ // move TCB to its stack location
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+ compiler->info.compLvFrameListRoot, 0);
+ }
+
+ // We are done, the rest of this function deals with the inlined case.
+ return initRegs;
+ }
+
+ regNumber regTCB;
+
+ if (varDsc->lvRegister)
+ {
+ regTCB = varDsc->lvRegNum;
+
+ // we are about to initialize it. So turn the bit off in initRegs to prevent
+ // the prolog reinitializing it.
+ initRegs &= ~genRegMask(regTCB);
+ }
+ else // varDsc is allocated on the Stack
+ {
+ regTCB = REG_PINVOKE_TCB;
+ }
+
+#if !defined(_TARGET_ARM_)
+#define WIN_NT_TLS_OFFSET (0xE10)
+#define WIN_NT5_TLS_HIGHOFFSET (0xf94)
+
+ /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
+
+ // TODO-ARM-CQ: should we inline TlsGetValue here?
+
+ if (threadTlsIndex < 64)
+ {
+ // mov reg, FS:[0xE10+threadTlsIndex*4]
+ getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
+ WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
+ regTracker.rsTrackRegTrash(regTCB);
+ }
+ else
+ {
+ noway_assert(pInfo->osMajor >= 5);
+
+ DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
+ threadTlsIndex -= 64;
+
+ // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
+ // mov reg, [reg+threadTlsIndex*4]
+
+ getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
+ regTracker.rsTrackRegTrash(regTCB);
+ }
+#endif
+
+ /* save TCB in local var if not enregistered */
+
+ if (!varDsc->lvRegister)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
+ }
+
+ /* set frame's vptr */
+
+ const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
+ inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
+ noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
+ compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
+ REG_PINVOKE_SCRATCH);
+
+ // Set the GSCookie
+ GSCookie gsCookie, *pGSCookie;
+ compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
+ noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
+
+ /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
+ set next field in frame */
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
+ pInfo->offsetOfThreadFrame);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
+ compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+
+ noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+
+ /* set EBP value in frame */
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
+ compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
+
+ /* reset track field in frame */
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
+
+ /* get address of our frame */
+
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
+
+ /* now "push" our N/direct frame */
+
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
+ pInfo->offsetOfThreadFrame);
+
+ return initRegs;
+}
+
+/*****************************************************************************
+ * Unchain the InlinedCallFrame.
+ * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
+ * or tail call.
+ */
+void CodeGen::genPInvokeMethodEpilog()
+{
+ noway_assert(compiler->info.compCallUnmanaged);
+ noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
+ noway_assert(compiler->compCurBB == compiler->genReturnBB ||
+ (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
+ (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+ noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ getEmitter()->emitDisableRandomNops();
+ // debug check to make sure that we're not using ESI and/or EDI across this call, except for
+ // compLvFrameListRoot.
+ unsigned regTrashCheck = 0;
+
+ /* XXX Tue 5/29/2007
+ * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code
+ * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
+ */
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+ regNumber reg;
+ regNumber reg2 = REG_PINVOKE_FRAME;
+
+ //
+ // Two cases for epilog invocation:
+ //
+ // 1. Return
+ // We can trash the ESI/EDI registers.
+ //
+ // 2. Tail call
+ // When tail called, we'd like to preserve enregistered args,
+ // in ESI/EDI so we can pass it to the callee.
+ //
+ // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
+ // Instead use the reserved local variable slot.
+ //
+ if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
+ {
+ if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Save the register in the reserved local var slot.
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+ compiler->lvaPInvokeFrameRegSaveVar, 0);
+#else
+ inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
+#endif
+ }
+ if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Save the register in the reserved local var slot.
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
+ compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
+#else
+ inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
+#endif
+ }
+ }
+
+ if (varDsc->lvRegister)
+ {
+ reg = varDsc->lvRegNum;
+ if (reg == reg2)
+ reg2 = REG_PINVOKE_TCB;
+
+ regTrashCheck |= genRegMask(reg2);
+ }
+ else
+ {
+ /* mov esi, [tcb address] */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
+ 0);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
+ reg = REG_PINVOKE_TCB;
+
+ regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
+ }
+
+ /* mov edi, [ebp-frame.next] */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+ regTracker.rsTrackRegTrash(reg2);
+
+ /* mov [esi+offsetOfThreadFrame], edi */
+
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
+
+ noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
+
+ if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
+ compiler->lvaTable[compiler->genReturnLocal].lvRegister)
+ {
+ // really make sure we're not clobbering compiler->genReturnLocal.
+ noway_assert(
+ !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
+ ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
+ }
+
+ (void)regTrashCheck;
+
+ // Restore the registers ESI and EDI.
+ if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
+ {
+ if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Restore the register from the reserved local var slot.
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
+ compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
+#else
+ inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
+#endif
+ regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
+ }
+ if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Restore the register from the reserved local var slot.
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+ compiler->lvaPInvokeFrameRegSaveVar, 0);
+#else
+ inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
+#endif
+ regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
+ }
+ }
+ getEmitter()->emitEnableRandomNops();
+}
+
+/*****************************************************************************
+ This function emits the call-site prolog for direct calls to unmanaged code.
+ It does all the necessary setup of the InlinedCallFrame.
+ frameListRoot specifies the local containing the thread control block.
+ argSize or methodToken is the value to be copied into the m_datum
+ field of the frame (methodToken may be indirected & have a reloc)
+ The function returns the register now containing the thread control block,
+ (it could be either enregistered or loaded into one of the scratch registers)
+*/
+
+regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot,
+ int argSize,
+ CORINFO_METHOD_HANDLE methodToken,
+ BasicBlock* returnLabel)
+{
+ // Some stack locals might be 'cached' in registers, we need to trash them
+ // from the regTracker *and* also ensure the gc tracker does not consider
+ // them live (see the next assert). However, they might be live reg vars
+ // that are non-pointers CSE'd from pointers.
+ // That means the register will be live in rsMaskVars, so we can't just
+ // call gcMarkSetNpt().
+ {
+ regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
+ gcInfo.gcRegGCrefSetCur &= ~deadRegs;
+ gcInfo.gcRegByrefSetCur &= ~deadRegs;
+
+#ifdef DEBUG
+ deadRegs &= regSet.rsMaskVars;
+ if (deadRegs)
+ {
+ for (LclVarDsc* varDsc = compiler->lvaTable;
+ ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
+ {
+ if (!varDsc->lvTracked || !varDsc->lvRegister)
+ continue;
+
+ if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
+ continue;
+
+ regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
+ if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+ varRegMask |= genRegMask(varDsc->lvOtherReg);
+
+ if (varRegMask & deadRegs)
+ {
+ // We found the enregistered var that should not be live if it
+ // was a GC pointer.
+ noway_assert(!varTypeIsGC(varDsc));
+ deadRegs &= ~varRegMask;
+ }
+ }
+ }
+#endif // DEBUG
+ }
+
+ /* Since we are using the InlinedCallFrame, we should have spilled all
+ GC pointers to it - even from callee-saved registers */
+
+ noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
+
+ /* must specify only one of these parameters */
+ noway_assert((argSize == 0) || (methodToken == NULL));
+
+ /* We are about to call unmanaged code directly.
+ Before we can do that we have to emit the following sequence:
+
+ mov dword ptr [frame.callTarget], MethodToken
+ mov dword ptr [frame.callSiteTracker], esp
+ mov reg, dword ptr [tcb_address]
+ mov byte ptr [tcb+offsetOfGcState], 0
+
+ */
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+
+ noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ /* mov dword ptr [frame.callSiteTarget], value */
+
+ if (methodToken == NULL)
+ {
+ /* mov dword ptr [frame.callSiteTarget], argSize */
+ instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+ }
+ else
+ {
+ void *embedMethHnd, *pEmbedMethHnd;
+
+ embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
+
+ noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
+
+ if (embedMethHnd != NULL)
+ {
+ /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
+ compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+ }
+ else
+ {
+ /* mov reg, dword ptr [MethodDescIndir]
+ mov dword ptr [frame.callSiteTarget], reg */
+
+ regNumber reg = regSet.rsPickFreeReg();
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
+#else // !CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
+#endif // !CPU_LOAD_STORE_ARCH
+ regTracker.rsTrackRegTrash(reg);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+ }
+ }
+
+ regNumber tcbReg = REG_NA;
+
+ if (frameListRoot->lvRegister)
+ {
+ tcbReg = frameListRoot->lvRegNum;
+ }
+ else
+ {
+ tcbReg = regSet.rsGrabReg(RBM_ALLINT);
+
+ /* mov reg, dword ptr [tcb address] */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
+ (unsigned)(frameListRoot - compiler->lvaTable), 0);
+ regTracker.rsTrackRegTrash(tcbReg);
+ }
+
+#ifdef _TARGET_X86_
+ /* mov dword ptr [frame.callSiteTracker], esp */
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+#endif // _TARGET_X86_
+
+#if CPU_LOAD_STORE_ARCH
+ regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
+ getEmitter()->emitIns_J_R(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
+ regTracker.rsTrackRegTrash(tmpReg);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+#else // !CPU_LOAD_STORE_ARCH
+ /* mov dword ptr [frame.callSiteReturnAddress], label */
+
+ getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+#endif // !CPU_LOAD_STORE_ARCH
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
+
+ noway_assert(tmpReg != tcbReg);
+
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
+#else // !CPU_LOAD_STORE_ARCH
+ /* mov byte ptr [tcbReg+offsetOfGcState], 0 */
+
+ getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
+#endif // !CPU_LOAD_STORE_ARCH
+
+ return tcbReg;
+}
+
+/*****************************************************************************
+ *
+ First we have to mark in the hoisted NDirect stub that we are back
+ in managed code. Then we have to check (a global flag) whether GC is
+ pending or not. If so, we just call into a jit-helper.
+ Right now we have this call always inlined, i.e. we always skip around
+ the jit-helper call.
+ Note:
+ The tcb address is a regular local (initialized in the prolog), so it is either
+ enregistered or in the frame:
+
+ tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
+ mov byte ptr[tcb_reg+offsetOfGcState], 1
+ cmp 'global GC pending flag', 0
+ je @f
+ [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC
+ [mov [EBP+spill_area+0], eax] ; spill the int return value if any
+ [mov [EBP+spill_area+4], edx] ; spill the long return value if any
+ call @callGC
+ [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any
+ [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any
+ @f:
+ */
+
+void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
+{
+ BasicBlock* clab_nostop;
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+ regNumber reg2;
+ regNumber reg3;
+
+#ifdef _TARGET_ARM_
+ reg3 = REG_R3;
+#else
+ reg3 = REG_EDX;
+#endif
+
+ getEmitter()->emitDisableRandomNops();
+
+ if (frameListRoot->lvRegister)
+ {
+ /* make sure that register is live across the call */
+
+ reg2 = frameListRoot->lvRegNum;
+ noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
+ }
+ else
+ {
+ /* mov reg2, dword ptr [tcb address] */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ reg2 = REG_R2;
+#else
+ reg2 = REG_ECX;
+#endif
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
+ (unsigned)(frameListRoot - compiler->lvaTable), 0);
+ regTracker.rsTrackRegTrash(reg2);
+ }
+
+#ifdef _TARGET_ARM_
+ /* mov r3, 1 */
+ /* strb [r2+offsetOfGcState], r3 */
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
+#else
+ /* mov byte ptr [tcb+offsetOfGcState], 1 */
+ getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
+#endif
+
+ /* test global flag (we return to managed code) */
+
+ LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
+
+ addrOfCaptureThreadGlobal =
+ compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
+ noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
+
+ // Can we directly use addrOfCaptureThreadGlobal?
+
+ if (addrOfCaptureThreadGlobal)
+ {
+#ifdef _TARGET_ARM_
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+ regTracker.rsTrackRegTrash(reg3);
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
+#else
+ getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
+#endif
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+ regTracker.rsTrackRegTrash(reg3);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
+#else // !_TARGET_ARM_
+
+ getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
+ (ssize_t)pAddrOfCaptureThreadGlobal);
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
+
+#endif // !_TARGET_ARM_
+ }
+
+ /* */
+ clab_nostop = genCreateTempLabel();
+
+ /* Generate the conditional jump */
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, clab_nostop);
+
+#ifdef _TARGET_ARM_
+// The helper preserves the return value on ARM
+#else
+ /* save return value (if necessary) */
+ if (retVal != RBM_NONE)
+ {
+ if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
+ {
+ /* push eax */
+
+ inst_RV(INS_push, REG_INTRET, TYP_INT);
+
+ if (retVal == RBM_LNGRET)
+ {
+ /* push edx */
+
+ inst_RV(INS_push, REG_EDX, TYP_INT);
+ }
+ }
+ }
+#endif
+
+ /* emit the call to the EE-helper that stops for GC (or other reasons) */
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
+ EA_UNKNOWN); /* retSize */
+
+#ifdef _TARGET_ARM_
+// The helper preserves the return value on ARM
+#else
+ /* restore return value (if necessary) */
+
+ if (retVal != RBM_NONE)
+ {
+ if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
+ {
+ if (retVal == RBM_LNGRET)
+ {
+ /* pop edx */
+
+ inst_RV(INS_pop, REG_EDX, TYP_INT);
+ regTracker.rsTrackRegTrash(REG_EDX);
+ }
+
+ /* pop eax */
+
+ inst_RV(INS_pop, REG_INTRET, TYP_INT);
+ regTracker.rsTrackRegTrash(REG_INTRET);
+ }
+ }
+#endif
+
+ /* genCondJump() closes the current emitter block */
+
+ genDefineTempLabel(clab_nostop);
+
+ // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with
+ // the above code. So the process is:
+ // 1) Return to cooperative mode
+ // 2) Check to see if we need to stop for GC
+ // 3) Return from the p/invoke (as far as the stack walker is concerned).
+
+ /* mov dword ptr [frame.callSiteTracker], 0 */
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+
+ getEmitter()->emitEnableRandomNops();
+}
+
+/*****************************************************************************/
+
+/*****************************************************************************
+* TRACKING OF FLAGS
+*****************************************************************************/
+
+void CodeGen::genFlagsEqualToNone()
+{
+ genFlagsEqReg = REG_NA;
+ genFlagsEqVar = (unsigned)-1;
+ genFlagsEqLoc.Init();
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the flags register has a value that reflects the
+ * contents of the given register.
+ */
+
+void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg)
+{
+ genFlagsEqLoc.CaptureLocation(getEmitter());
+ genFlagsEqReg = reg;
+
+ /* previous setting of flags by a var becomes invalid */
+
+ genFlagsEqVar = 0xFFFFFFFF;
+
+ /* Set appropriate flags on the tree */
+
+ if (tree)
+ {
+ tree->gtFlags |= GTF_ZSF_SET;
+ assert(tree->gtSetFlags());
+ }
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the flags register has a value that reflects the
+ * contents of the given local variable.
+ */
+
+void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var)
+{
+ genFlagsEqLoc.CaptureLocation(getEmitter());
+ genFlagsEqVar = var;
+
+ /* previous setting of flags by a register becomes invalid */
+
+ genFlagsEqReg = REG_NA;
+
+ /* Set appropriate flags on the tree */
+
+ if (tree)
+ {
+ tree->gtFlags |= GTF_ZSF_SET;
+ assert(tree->gtSetFlags());
+ }
+}
+
+/*****************************************************************************
+ *
+ * Return an indication of whether the flags register is set to the current
+ * value of the given register/variable. The return value is as follows:
+ *
+ * false .. nothing
+ * true .. the zero flag (ZF) and sign flag (SF) is set
+ */
+
+bool CodeGen::genFlagsAreReg(regNumber reg)
+{
+ if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+bool CodeGen::genFlagsAreVar(unsigned var)
+{
+ if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ * This utility function returns true iff the execution path from "from"
+ * (inclusive) to "to" (exclusive) contains a death of the given var
+ */
+bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
+{
+ GenTreePtr tree;
+ for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
+ {
+ if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
+ {
+ unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
+ if (dyingVarNum == varNum)
+ return true;
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ if (varDsc->lvPromoted)
+ {
+ assert(varDsc->lvType == TYP_STRUCT);
+ unsigned firstFieldNum = varDsc->lvFieldLclStart;
+ if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
+ {
+ return true;
+ }
+ }
+ }
+ }
+ assert(tree != NULL);
+ return false;
+}
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 20e666fccc..575624fb44 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -1,9385 +1,9388 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX XX
-XX Amd64/x86 Code Generator XX
-XX XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
-#include "jitpch.h"
-#ifdef _MSC_VER
-#pragma hdrstop
-#endif
-
-#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
-
-#ifdef _TARGET_XARCH_
-#include "emit.h"
-#include "codegen.h"
-#include "lower.h"
-#include "gcinfo.h"
-#include "gcinfoencoder.h"
-
-// Get the register assigned to the given node
-
-regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
-{
- return tree->gtRegNum;
-}
-
-//------------------------------------------------------------------------
-// genSpillVar: Spill a local variable
-//
-// Arguments:
-// tree - the lclVar node for the variable being spilled
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The lclVar must be a register candidate (lvRegCandidate)
-
-void CodeGen::genSpillVar(GenTreePtr tree)
-{
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- assert(varDsc->lvIsRegCandidate());
-
- // We don't actually need to spill if it is already living in memory
- bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
- if (needsSpill)
- {
- var_types lclTyp = varDsc->TypeGet();
- if (varDsc->lvNormalizeOnStore())
- {
- lclTyp = genActualType(lclTyp);
- }
- emitAttr size = emitTypeSize(lclTyp);
-
- bool restoreRegVar = false;
- if (tree->gtOper == GT_REG_VAR)
- {
- tree->SetOper(GT_LCL_VAR);
- restoreRegVar = true;
- }
-
- // mask off the flag to generate the right spill code, then bring it back
- tree->gtFlags &= ~GTF_REG_VAL;
-
- instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
-#if CPU_LONG_USES_REGPAIR
- if (varTypeIsMultiReg(tree))
- {
- assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
- assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
- regNumber regLo = genRegPairLo(tree->gtRegPair);
- regNumber regHi = genRegPairHi(tree->gtRegPair);
- inst_TT_RV(storeIns, tree, regLo);
- inst_TT_RV(storeIns, tree, regHi, 4);
- }
- else
-#endif
- {
- assert(varDsc->lvRegNum == tree->gtRegNum);
- inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
- }
- tree->gtFlags |= GTF_REG_VAL;
-
- if (restoreRegVar)
- {
- tree->SetOper(GT_REG_VAR);
- }
-
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
- gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
-
- if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
- {
-#ifdef DEBUG
- if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
- }
-#endif
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
-
- tree->gtFlags &= ~GTF_SPILL;
- varDsc->lvRegNum = REG_STK;
- if (varTypeIsMultiReg(tree))
- {
- varDsc->lvOtherReg = REG_STK;
- }
-}
-
-// inline
-void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
-{
- assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
- varDsc->lvRegNum = tree->gtRegNum;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-
-/*****************************************************************************
- *
- * Generate code that will set the given register to the integer constant.
- */
-
-void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
-{
- // Reg cannot be a FP reg
- assert(!genIsValidFloatReg(reg));
-
- // The only TYP_REF constant that can come this path is a managed 'null' since it is not
- // relocatable. Other ref type constants (e.g. string objects) go through a different
- // code path.
- noway_assert(type != TYP_REF || val == 0);
-
- if (val == 0)
- {
- instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
- }
- else
- {
- // TODO-XArch-CQ: needs all the optimized cases
- getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
- }
-}
-
-/*****************************************************************************
- *
- * Generate code to check that the GS cookie wasn't thrashed by a buffer
- * overrun. If pushReg is true, preserve all registers around code sequence.
- * Otherwise ECX could be modified.
- *
- * Implementation Note: pushReg = true, in case of tail calls.
- */
-void CodeGen::genEmitGSCookieCheck(bool pushReg)
-{
- noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
-
- // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
- // executing GS cookie check will not collect the object pointed to by EAX.
- //
- // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX
- // In such case make sure that the correct GC-ness of RDX is reported as well, so
- // a GC object pointed by RDX will not be collected.
- if (!pushReg)
- {
- // Handle multi-reg return type values
- if (compiler->compMethodReturnsMultiRegRetType())
- {
- ReturnTypeDesc retTypeDesc;
- if (varTypeIsLong(compiler->info.compRetNativeType))
- {
- retTypeDesc.InitializeLongReturnType(compiler);
- }
- else // we must have a struct return type
- {
- retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
- }
-
- unsigned regCount = retTypeDesc.GetReturnRegCount();
-
- // Only x86 and x64 Unix ABI allows multi-reg return and
- // number of result regs should be equal to MAX_RET_REG_COUNT.
- assert(regCount == MAX_RET_REG_COUNT);
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
- }
- }
- else if (compiler->compMethodReturnsRetBufAddr())
- {
- // This is for returning in an implicit RetBuf.
- // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef.
-
- // In case the return is in an implicit RetBuf, the native return type should be a struct
- assert(varTypeIsStruct(compiler->info.compRetNativeType));
-
- gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
- }
- // ... all other cases.
- else
- {
-#ifdef _TARGET_AMD64_
- // For x64, structs that are not returned in registers are always
- // returned in implicit RetBuf. If we reached here, we should not have
- // a RetBuf and the return type should not be a struct.
- assert(compiler->info.compRetBuffArg == BAD_VAR_NUM);
- assert(!varTypeIsStruct(compiler->info.compRetNativeType));
-#endif // _TARGET_AMD64_
-
- // For x86 Windows we can't make such assertions since we generate code for returning of
- // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise
- // compRetNativeType could be TYP_STRUCT.
- gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
- }
- }
-
- regNumber regGSCheck;
- if (!pushReg)
- {
- // Non-tail call: we can use any callee trash register that is not
- // a return register or contain 'this' pointer (keep alive this), since
- // we are generating GS cookie check after a GT_RETURN block.
- // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well
- // as return register for two-register-returned structs.
- if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
- (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0))
- {
- regGSCheck = REG_ARG_1;
- }
- else
- {
- regGSCheck = REG_ARG_0;
- }
- }
- else
- {
-#ifdef _TARGET_X86_
- NYI_X86("Tail calls from methods that need GS check");
- regGSCheck = REG_NA;
-#else // !_TARGET_X86_
- // Tail calls from methods that need GS check: We need to preserve registers while
- // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
- // check, we might need a register. This won't be an issue for jmp calls for the
- // reason mentioned below (see comment starting with "Jmp Calls:").
- //
- // The following are the possible solutions in case of tail prefixed calls:
- // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
- // present in methods that require GS cookie check. Rest of the tail calls that
- // do not require R11 will be honored.
- // 2) Internal register - GT_CALL node reserves an internal register and emits GS
- // cookie check as part of tail call codegen. GenExitCode() needs to special case
- // fast tail calls implemented as epilog+jmp or such tail calls should always get
- // dispatched via helper.
- // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
- // right execution order during rationalization.
- //
- // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
- // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed
- // VSD calls from methods that need GS check.
- //
- // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
- // ignores tail prefix. In future, if we intend to support tail calls from such a method,
- // consider one of the options mentioned above. For now adding an assert that we don't
- // expect to see a tail call in a method that requires GS check.
- noway_assert(!compiler->compTailCallUsed);
-
- // Jmp calls: specify method handle using which JIT queries VM for its entry point
- // address and hence it can neither be a VSD call nor PInvoke calli with cookie
- // parameter. Therefore, in case of jmp calls it is safe to use R11.
- regGSCheck = REG_R11;
-#endif // !_TARGET_X86_
- }
-
- if (compiler->gsGlobalSecurityCookieAddr == nullptr)
- {
- // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
- // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
- if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
- {
- genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
- }
- else
- {
- getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
- (int)compiler->gsGlobalSecurityCookieVal);
- }
- }
- else
- {
- // Ngen case - GS cookie value needs to be accessed through an indirection.
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
- }
-
- BasicBlock* gsCheckBlk = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, gsCheckBlk);
- genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
- genDefineTempLabel(gsCheckBlk);
-}
-
-/*****************************************************************************
- *
- * Generate code for all the basic blocks in the function.
- */
-
-void CodeGen::genCodeForBBlist()
-{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- unsigned savedStkLvl;
-
-#ifdef DEBUG
- genInterruptibleUsed = true;
-
- // You have to be careful if you create basic blocks from now on
- compiler->fgSafeBasicBlockCreation = false;
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnCall)
- {
- compiler->opts.compStackCheckOnCall = false;
- }
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnRet)
- {
- compiler->opts.compStackCheckOnRet = false;
- }
-#endif // DEBUG
-
- // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
- genPrepForEHCodegen();
-
- assert(!compiler->fgFirstBBScratch ||
- compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
-
- /* Initialize the spill tracking logic */
-
- regSet.rsSpillBeg();
-
-#ifdef DEBUGGING_SUPPORT
- /* Initialize the line# tracking logic */
-
- if (compiler->opts.compScopeInfo)
- {
- siInit();
- }
-#endif
-
- // The current implementation of switch tables requires the first block to have a label so it
- // can generate offsets to the switch label targets.
- // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
- if (compiler->fgHasSwitch)
- {
- compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
- }
-
- genPendingCallLabel = nullptr;
-
- /* Initialize the pointer tracking code */
-
- gcInfo.gcRegPtrSetInit();
- gcInfo.gcVarPtrSetInit();
-
- /* If any arguments live in registers, mark those regs as such */
-
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
- {
- /* Is this variable a parameter assigned to a register? */
-
- if (!varDsc->lvIsParam || !varDsc->lvRegister)
- {
- continue;
- }
-
- /* Is the argument live on entry to the method? */
-
- if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
- {
- continue;
- }
-
- /* Is this a floating-point argument? */
-
- if (varDsc->IsFloatRegType())
- {
- continue;
- }
-
- noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
-
- /* Mark the register as holding the variable */
-
- regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
- }
-
- unsigned finallyNesting = 0;
-
- // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
- // allocation at the start of each basic block.
- VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
-
- /*-------------------------------------------------------------------------
- *
- * Walk the basic blocks and generate code for each one
- *
- */
-
- BasicBlock* block;
- BasicBlock* lblk; /* previous block */
-
- for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n=============== Generating ");
- block->dspBlockHeader(compiler, true, true);
- compiler->fgDispBBLiveness(block);
- }
-#endif // DEBUG
-
- // Figure out which registers hold variables on entry to this block
-
- regSet.ClearMaskVars();
- gcInfo.gcRegGCrefSetCur = RBM_NONE;
- gcInfo.gcRegByrefSetCur = RBM_NONE;
-
- compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
-
- genUpdateLife(block->bbLiveIn);
-
- // Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
- // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
- // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
-
- regMaskTP newLiveRegSet = RBM_NONE;
- regMaskTP newRegGCrefSet = RBM_NONE;
- regMaskTP newRegByrefSet = RBM_NONE;
-#ifdef DEBUG
- VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
- VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
-#endif
- VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- if (varDsc->lvIsInReg())
- {
- newLiveRegSet |= varDsc->lvRegMask();
- if (varDsc->lvType == TYP_REF)
- {
- newRegGCrefSet |= varDsc->lvRegMask();
- }
- else if (varDsc->lvType == TYP_BYREF)
- {
- newRegByrefSet |= varDsc->lvRegMask();
- }
-#ifdef DEBUG
- if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- else if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- }
-
- regSet.rsMaskVars = newLiveRegSet;
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- if (!VarSetOps::IsEmpty(compiler, addedGCVars))
- {
- printf("\t\t\t\t\t\t\tAdded GCVars: ");
- dumpConvertedVarSet(compiler, addedGCVars);
- printf("\n");
- }
- if (!VarSetOps::IsEmpty(compiler, removedGCVars))
- {
- printf("\t\t\t\t\t\t\tRemoved GCVars: ");
- dumpConvertedVarSet(compiler, removedGCVars);
- printf("\n");
- }
- }
-#endif // DEBUG
-
- gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
- gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
-
- /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
- represent the exception object (TYP_REF).
- We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
- to the block, it will be the first thing evaluated
- (thanks to GTF_ORDER_SIDEEFF).
- */
-
- if (handlerGetsXcptnObj(block->bbCatchTyp))
- {
- for (GenTree* node : LIR::AsRange(block))
- {
- if (node->OperGet() == GT_CATCH_ARG)
- {
- gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
- break;
- }
- }
- }
-
- /* Start a new code output block */
-
- genUpdateCurrentFunclet(block);
-
- if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
- {
- getEmitter()->emitLoopAlign();
- }
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- {
- printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
- }
-#endif
-
- block->bbEmitCookie = nullptr;
-
- if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
- {
- /* Mark a label and update the current set of live GC refs */
-
- block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, FALSE);
- }
-
- if (block == compiler->fgFirstColdBlock)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nThis is the start of the cold region of the method\n");
- }
-#endif
- // We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
-
- // We require the block that starts the Cold section to have a label
- noway_assert(block->bbEmitCookie);
- getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
- }
-
- /* Both stacks are always empty on entry to a basic block */
-
- genStackLevel = 0;
-
- savedStkLvl = genStackLevel;
-
- /* Tell everyone which basic block we're working on */
-
- compiler->compCurBB = block;
-
-#ifdef DEBUGGING_SUPPORT
- siBeginBlock(block);
-
- // BBF_INTERNAL blocks don't correspond to any single IL instruction.
- if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
- !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
- // emit a NO_MAPPING entry, immediately after the prolog.
- {
- genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
- }
-
- bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
-
- /*---------------------------------------------------------------------
- *
- * Generate code for each statement-tree in the block
- *
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
- if (block->bbFlags & BBF_FUNCLET_BEG)
- {
- genReserveFuncletProlog(block);
- }
-#endif // FEATURE_EH_FUNCLETS
-
- // Clear compCurStmt and compCurLifeTree.
- compiler->compCurStmt = nullptr;
- compiler->compCurLifeTree = nullptr;
-
- // Traverse the block in linear order, generating code for each node as we
- // as we encounter it.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUGGING_SUPPORT
- IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
-#endif
- for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
- {
-#ifdef DEBUGGING_SUPPORT
- // Do we have a new IL offset?
- if (node->OperGet() == GT_IL_OFFSET)
- {
- genEnsureCodeEmitted(currentILOffset);
- currentILOffset = node->gtStmt.gtStmtILoffsx;
- genIPmappingAdd(currentILOffset, firstMapping);
- firstMapping = false;
- }
-#endif // DEBUGGING_SUPPORT
-
-#ifdef DEBUG
- if (node->OperGet() == GT_IL_OFFSET)
- {
- noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
- node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
-
- if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
- node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
- {
- while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
- {
- genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
- }
- }
- }
-#endif // DEBUG
-
- genCodeForTreeNode(node);
- if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
- {
- genConsumeReg(node);
- }
- } // end for each node in block
-
-#ifdef DEBUG
- // The following set of register spill checks and GC pointer tracking checks used to be
- // performed at statement boundaries. Now, with LIR, there are no statements, so they are
- // performed at the end of each block.
- // TODO: could these checks be performed more frequently? E.g., at each location where
- // the register allocator says there are no live non-variable registers. Perhaps this could
- // be done by (a) keeping a running count of live non-variable registers by using
- // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
- // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
- // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
- // there will be no live non-variable registers.
-
- regSet.rsSpillChk();
-
- /* Make sure we didn't bungle pointer register tracking */
-
- regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
-
- // If return is a GC-type, clear it. Note that if a common
- // epilog is generated (genReturnBB) it has a void return
- // even though we might return a ref. We can't use the compRetType
- // as the determiner because something we are tracking as a byref
- // might be used as a return value of a int function (which is legal)
- GenTree* blockLastNode = block->lastNode();
- if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
- (varTypeIsGC(compiler->info.compRetType) ||
- (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
- {
- nonVarPtrRegs &= ~RBM_INTRET;
- }
-
- if (nonVarPtrRegs)
- {
- printf("Regset after BB%02u gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- printf(", regVars=");
- printRegMaskInt(regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
- printf("\n");
- }
-
- noway_assert(nonVarPtrRegs == RBM_NONE);
-#endif // DEBUG
-
-#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
- if (block->bbNext == nullptr)
- {
- // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
- // (it's as good as any, but better than the prolog, which can only be a single instruction
- // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
- // thinks the instructions are the same as we do.
- genAmd64EmitterUnitTests();
- }
-#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
-
-#ifdef DEBUGGING_SUPPORT
- // It is possible to reach the end of the block without generating code for the current IL offset.
- // For example, if the following IR ends the current block, no code will have been generated for
- // offset 21:
- //
- // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
- //
- // N001 ( 0, 0) [000039] ------------ nop void
- //
- // This can lead to problems when debugging the generated code. To prevent these issues, make sure
- // we've generated code for the last IL offset we saw in the block.
- genEnsureCodeEmitted(currentILOffset);
-
- if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
- {
- siEndBlock(block);
-
- /* Is this the last block, and are there any open scopes left ? */
-
- bool isLastBlockProcessed = (block->bbNext == nullptr);
- if (block->isBBCallAlwaysPair())
- {
- isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
- }
-
- if (isLastBlockProcessed && siOpenScopeList.scNext)
- {
- /* This assert no longer holds, because we may insert a throw
- block to demarcate the end of a try or finally region when they
- are at the end of the method. It would be nice if we could fix
- our code so that this throw block will no longer be necessary. */
-
- // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
-
- siCloseAllOpenScopes();
- }
- }
-
-#endif // DEBUGGING_SUPPORT
-
- genStackLevel -= savedStkLvl;
-
-#ifdef DEBUG
- // compCurLife should be equal to the liveOut set, except that we don't keep
- // it up to date for vars that are not register candidates
- // (it would be nice to have a xor set function)
-
- VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
- VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
- VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
- while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- assert(!varDsc->lvIsRegCandidate());
- }
-#endif
-
- /* Both stacks should always be empty on exit from a basic block */
- noway_assert(genStackLevel == 0);
-
-#ifdef _TARGET_AMD64_
- // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
- // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
- // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
- // The document "X64 and ARM ABIs.docx" has more details. The situations:
- // 1. If the call instruction is in a different EH region as the instruction that follows it.
- // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
- // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
- // here.)
- // We handle case #1 here, and case #2 in the emitter.
- if (getEmitter()->emitIsLastInsCall())
- {
- // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
- // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
- // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
- // generated before the OS epilog starts, such as a GS cookie check.
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- // We only need the NOP if we're not going to generate any more code as part of the block end.
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- case BBJ_THROW:
- case BBJ_CALLFINALLY:
- case BBJ_EHCATCHRET:
- // We're going to generate more code below anyway, so no need for the NOP.
-
- case BBJ_RETURN:
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- // These are the "epilog follows" case, handled in the emitter.
-
- break;
-
- case BBJ_NONE:
- if (block->bbNext == nullptr)
- {
- // Call immediately before the end of the code; we should never get here .
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- else
- {
- // We need the NOP
- instGen(INS_nop);
- }
- break;
-
- case BBJ_COND:
- case BBJ_SWITCH:
- // These can't have a call as the last instruction!
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
- }
- }
-#endif // _TARGET_AMD64_
-
- /* Do we need to generate a jump or return? */
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- inst_JMP(EJ_jmp, block->bbJumpDest);
- break;
-
- case BBJ_RETURN:
- genExitCode(block);
- break;
-
- case BBJ_THROW:
- // If we have a throw at the end of a function or funclet, we need to emit another instruction
- // afterwards to help the OS unwinder determine the correct context during unwind.
- // We insert an unexecuted breakpoint instruction in several situations
- // following a throw instruction:
- // 1. If the throw is the last instruction of the function or funclet. This helps
- // the OS unwinder determine the correct context during an unwind from the
- // thrown exception.
- // 2. If this is this is the last block of the hot section.
- // 3. If the subsequent block is a special throw block.
- // 4. On AMD64, if the next block is in a different EH region.
- if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
- !BasicBlock::sameEHRegion(block, block->bbNext) ||
- (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
- block->bbNext == compiler->fgFirstColdBlock)
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
-
- break;
-
- case BBJ_CALLFINALLY:
-
-#if FEATURE_EH_FUNCLETS
-
- // Generate a call to the finally, like this:
- // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
- // call finally-funclet
- // jmp finally-return // Only for non-retless finally calls
- // The jmp can be a NOP if we're going to the next block.
- // If we're generating code for the main function (not a funclet), and there is no localloc,
- // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP
- // instead of loading the PSPSym in this case.
-
- if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))
- {
- inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
- }
- else
- {
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
- }
- getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
-
- if (block->bbFlags & BBF_RETLESS_CALL)
- {
- // We have a retless call, and the last instruction generated was a call.
- // If the next block is in a different EH region (or is the end of the code
- // block), then we need to generate a breakpoint here (since it will never
- // get executed) to get proper unwind behavior.
-
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- }
- else
- {
- // Because of the way the flowgraph is connected, the liveness info for this one instruction
- // after the call is not (can not be) correct in cases where a variable has a last use in the
- // handler. So turn off GC reporting for this single instruction.
- getEmitter()->emitDisableGC();
-
- // Now go to where the finally funclet needs to return to.
- if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
- {
- // Fall-through.
- // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
- // to the next instruction? This would depend on stack walking from within the finally
- // handler working without this instruction being in this special EH region.
- instGen(INS_nop);
- }
- else
- {
- inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
- }
-
- getEmitter()->emitEnableGC();
- }
-
-#else // !FEATURE_EH_FUNCLETS
-
- // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
- // corresponding to the finally's nesting level. When invoked in response to an exception, the
- // EE does this.
- //
- // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
- //
- // We will emit :
- // mov [ebp - (n + 1)], 0
- // mov [ebp - n ], 0xFC
- // push &step
- // jmp finallyBlock
- // ...
- // step:
- // mov [ebp - n ], 0
- // jmp leaveTarget
- // ...
- // leaveTarget:
-
- noway_assert(isFramePointerUsed());
-
- // Get the nesting level which contains the finally
- compiler->fgGetNestingLevel(block, &finallyNesting);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- filterEndOffsetSlotOffs =
- (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
-
- // Zero out the slot for the next nesting level
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs - TARGET_POINTER_SIZE);
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs);
-
- // Now push the address where the finally funclet should return to directly.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
- getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
- }
- else
- {
- // EE expects a DWORD, so we give him 0
- inst_IV(INS_push_hide, 0);
- }
-
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
-
-#endif // !FEATURE_EH_FUNCLETS
-
- // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
- // jump target using bbJumpDest - that is already used to point
- // to the finally block. So just skip past the BBJ_ALWAYS unless the
- // block is RETLESS.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
-
- lblk = block;
- block = block->bbNext;
- }
-
- break;
-
-#if FEATURE_EH_FUNCLETS
-
- case BBJ_EHCATCHRET:
- // Set RAX to the address the VM should return to after the catch.
- // Generate a RIP-relative
- // lea reg, [rip + disp32] ; the RIP is implicit
- // which will be position-indepenent.
- getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
-
-#else // !FEATURE_EH_FUNCLETS
-
- case BBJ_EHCATCHRET:
- noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- {
- // The last statement of the block must be a GT_RETFILT, which has already been generated.
- assert(block->lastNode() != nullptr);
- assert(block->lastNode()->OperGet() == GT_RETFILT);
-
- if (block->bbJumpKind == BBJ_EHFINALLYRET)
- {
- assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
-
- // Return using a pop-jmp sequence. As the "try" block calls
- // the finally with a jmp, this leaves the x86 call-ret stack
- // balanced in the normal flow of path.
-
- noway_assert(isFramePointerRequired());
- inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
- inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
- }
- else
- {
- assert(block->bbJumpKind == BBJ_EHFILTERRET);
-
- // The return value has already been computed.
- instGen_Return(0);
- }
- }
- break;
-
-#endif // !FEATURE_EH_FUNCLETS
-
- case BBJ_NONE:
- case BBJ_COND:
- case BBJ_SWITCH:
- break;
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
-
-#ifdef DEBUG
- compiler->compCurBB = nullptr;
-#endif
-
- } //------------------ END-FOR each block of the method -------------------
-
- /* Nothing is live at this point */
- genUpdateLife(VarSetOps::MakeEmpty(compiler));
-
- /* Finalize the spill tracking logic */
-
- regSet.rsSpillEnd();
-
- /* Finalize the temp tracking logic */
-
- compiler->tmpEnd();
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n# ");
- printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
- compiler->compSizeEstimate);
- printf("%s\n", compiler->info.compFullName);
- }
-#endif
-}
-
-// return the child that has the same reg as the dst (if any)
-// other child returned (out param) in 'other'
-GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
-{
- if (tree->gtRegNum == REG_NA)
- {
- other = nullptr;
- return nullptr;
- }
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- if (op1->gtRegNum == tree->gtRegNum)
- {
- other = op2;
- return op1;
- }
- if (op2->gtRegNum == tree->gtRegNum)
- {
- other = op1;
- return op2;
- }
- else
- {
- other = nullptr;
- return nullptr;
- }
-}
-
-// Move an immediate value into an integer register
-
-void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
-{
- // reg cannot be a FP register
- assert(!genIsValidFloatReg(reg));
-
- if (!compiler->opts.compReloc)
- {
- size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
- }
-
- if ((imm == 0) && !EA_IS_RELOC(size))
- {
- instGen_Set_Reg_To_Zero(size, reg, flags);
- }
- else
- {
- if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
- {
- getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
- }
- else
- {
- getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
- }
- }
- regTracker.rsTrackRegIntCns(reg, imm);
-}
-
-/***********************************************************************************
- *
- * Generate code to set a register 'targetReg' of type 'targetType' to the constant
- * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
- * genProduceReg() on the target register.
- */
-void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
-{
-
- switch (tree->gtOper)
- {
- case GT_CNS_INT:
- {
- // relocatable values tend to come down as a CNS_INT of native int type
- // so the line between these two opcodes is kind of blurry
- GenTreeIntConCommon* con = tree->AsIntConCommon();
- ssize_t cnsVal = con->IconValue();
-
- if (con->ImmedValNeedsReloc(compiler))
- {
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
- regTracker.rsTrackRegTrash(targetReg);
- }
- else
- {
- genSetRegToIcon(targetReg, cnsVal, targetType);
- }
- }
- break;
-
- case GT_CNS_DBL:
- {
- double constValue = tree->gtDblCon.gtDconVal;
-
- // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0)
- if (*(__int64*)&constValue == 0)
- {
- // A faster/smaller way to generate 0
- instruction ins = genGetInsForOper(GT_XOR, targetType);
- inst_RV_RV(ins, targetReg, targetReg, targetType);
- }
- else
- {
- GenTreePtr cns;
- if (targetType == TYP_FLOAT)
- {
- float f = forceCastToFloat(constValue);
- cns = genMakeConst(&f, targetType, tree, false);
- }
- else
- {
- cns = genMakeConst(&constValue, targetType, tree, true);
- }
-
- inst_RV_TT(ins_Load(targetType), targetReg, cns);
- }
- }
- break;
-
- default:
- unreached();
- }
-}
-
-// Generate code to get the high N bits of a N*N=2N bit multiplication result
-void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
-{
- assert(!(treeNode->gtFlags & GTF_UNSIGNED));
- assert(!treeNode->gtOverflowEx());
-
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
- emitAttr size = emitTypeSize(treeNode);
- GenTree* op1 = treeNode->gtOp.gtOp1;
- GenTree* op2 = treeNode->gtOp.gtOp2;
-
- // to get the high bits of the multiply, we are constrained to using the
- // 1-op form: RDX:RAX = RAX * rm
- // The 3-op form (Rx=Ry*Rz) does not support it.
-
- genConsumeOperands(treeNode->AsOp());
-
- GenTree* regOp = op1;
- GenTree* rmOp = op2;
-
- // Set rmOp to the contained memory operand (if any)
- //
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
- {
- regOp = op2;
- rmOp = op1;
- }
- assert(!regOp->isContained());
-
- // Setup targetReg when neither of the source operands was a matching register
- if (regOp->gtRegNum != targetReg)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
- }
-
- emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
-
- // Move the result to the desired register, if necessary
- if (targetReg != REG_RDX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
- }
-}
-
-// generate code for a DIV or MOD operation
-//
-void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
-{
- GenTree* dividend = treeNode->gtOp1;
- GenTree* divisor = treeNode->gtOp2;
- genTreeOps oper = treeNode->OperGet();
- emitAttr size = emitTypeSize(treeNode);
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
-
- // dividend is not contained.
- assert(!dividend->isContained());
-
- genConsumeOperands(treeNode->AsOp());
- if (varTypeIsFloating(targetType))
- {
- // divisor is not contained or if contained is a memory op.
- // Note that a reg optional operand is a treated as a memory op
- // if no register is allocated to it.
- assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
- divisor->IsRegOptional());
-
- // Floating point div/rem operation
- assert(oper == GT_DIV || oper == GT_MOD);
-
- if (dividend->gtRegNum == targetReg)
- {
- emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
- }
- else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
- {
- // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
- // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
- // LSRA has to make sure that such a register assignment is not generated for floating
- // point div/rem operations.
- noway_assert(
- !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
- }
- else
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
- emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
- }
- }
- else
- {
- // dividend must be in RAX
- if (dividend->gtRegNum != REG_RAX)
- {
- inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
- }
-
- // zero or sign extend rax to rdx
- if (oper == GT_UMOD || oper == GT_UDIV)
- {
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
- }
- else
- {
- emit->emitIns(INS_cdq, size);
- // the cdq instruction writes RDX, So clear the gcInfo for RDX
- gcInfo.gcMarkRegSetNpt(RBM_RDX);
- }
-
- // Perform the 'targetType' (64-bit or 32-bit) divide instruction
- instruction ins;
- if (oper == GT_UMOD || oper == GT_UDIV)
- {
- ins = INS_div;
- }
- else
- {
- ins = INS_idiv;
- }
-
- emit->emitInsBinary(ins, size, treeNode, divisor);
-
- // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
- // Move the result to the desired register, if necessary
- if (oper == GT_DIV || oper == GT_UDIV)
- {
- if (targetReg != REG_RAX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
- }
- }
- else
- {
- assert((oper == GT_MOD) || (oper == GT_UMOD));
- if (targetReg != REG_RDX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
- }
- }
- }
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genCodeForBinary: Generate code for many binary arithmetic operators
-// This method is expected to have called genConsumeOperands() before calling it.
-//
-// Arguments:
-// treeNode - The binary operation for which we are generating code.
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Mul and div variants have special constraints on x64 so are not handled here.
-// See teh assert below for the operators that are handled.
-
-void CodeGen::genCodeForBinary(GenTree* treeNode)
-{
- const genTreeOps oper = treeNode->OperGet();
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
-
-#if defined(_TARGET_64BIT_)
- assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
-#else // !defined(_TARGET_64BIT_)
- assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
- oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI ||
- oper == GT_ADD || oper == GT_SUB);
-#endif // !defined(_TARGET_64BIT_)
-
- GenTreePtr op1 = treeNode->gtGetOp1();
- GenTreePtr op2 = treeNode->gtGetOp2();
-
- // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
- if (op1->isContained())
- {
- assert(treeNode->OperIsCommutative());
- assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
-
- op1 = treeNode->gtGetOp2();
- op2 = treeNode->gtGetOp1();
- }
-
- instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
-
- // The arithmetic node must be sitting in a register (since it's not contained)
- noway_assert(targetReg != REG_NA);
-
- regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum;
- regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum;
-
- GenTreePtr dst;
- GenTreePtr src;
-
- // This is the case of reg1 = reg1 op reg2
- // We're ready to emit the instruction without any moves
- if (op1reg == targetReg)
- {
- dst = op1;
- src = op2;
- }
- // We have reg1 = reg2 op reg1
- // In order for this operation to be correct
- // we need that op is a commutative operation so
- // we can convert it into reg1 = reg1 op reg2 and emit
- // the same code as above
- else if (op2reg == targetReg)
- {
- noway_assert(GenTree::OperIsCommutative(oper));
- dst = op2;
- src = op1;
- }
- // now we know there are 3 different operands so attempt to use LEA
- else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
- && (op2->isContainedIntOrIImmed() || !op2->isContained()))
- {
- if (op2->isContainedIntOrIImmed())
- {
- emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg,
- (int)op2->AsIntConCommon()->IconValue());
- }
- else
- {
- assert(op2reg != REG_NA);
- emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
- }
- genProduceReg(treeNode);
- return;
- }
- // dest, op1 and op2 registers are different:
- // reg3 = reg1 op reg2
- // We can implement this by issuing a mov:
- // reg3 = reg1
- // reg3 = reg3 op reg2
- else
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
- regTracker.rsTrackRegCopy(targetReg, op1reg);
- gcInfo.gcMarkRegPtrVal(targetReg, targetType);
- dst = treeNode;
- src = op2;
- }
-
- // try to use an inc or dec
- if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
- {
- if (src->IsIntegralConst(1))
- {
- emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
- genProduceReg(treeNode);
- return;
- }
- else if (src->IsIntegralConst(-1))
- {
- emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
- genProduceReg(treeNode);
- return;
- }
- }
- regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
- noway_assert(r == targetReg);
-
- if (treeNode->gtOverflowEx())
- {
-#if !defined(_TARGET_64BIT_)
- assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI);
-#else
- assert(oper == GT_ADD || oper == GT_SUB);
-#endif
- genCheckOverflow(treeNode);
- }
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// isStructReturn: Returns whether the 'treeNode' is returning a struct.
-//
-// Arguments:
-// treeNode - The tree node to evaluate whether is a struct return.
-//
-// Return Value:
-// For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct.
-// Otherwise returns false.
-// For other platforms always returns false.
-//
-bool CodeGen::isStructReturn(GenTreePtr treeNode)
-{
- // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
- // For the GT_RET_FILT, the return is always
- // a bool or a void, for the end of a finally block.
- noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
- if (treeNode->OperGet() != GT_RETURN)
- {
- return false;
- }
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- return varTypeIsStruct(treeNode);
-#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
- assert(!varTypeIsStruct(treeNode));
- return false;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-}
-
-//------------------------------------------------------------------------
-// genStructReturn: Generates code for returning a struct.
-//
-// Arguments:
-// treeNode - The GT_RETURN tree node.
-//
-// Return Value:
-// None
-//
-// Assumption:
-// op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
-void CodeGen::genStructReturn(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_RETURN);
- GenTreePtr op1 = treeNode->gtGetOp1();
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (op1->OperGet() == GT_LCL_VAR)
- {
- GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
- LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
- assert(varDsc->lvIsMultiRegRet);
-
- ReturnTypeDesc retTypeDesc;
- retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
- unsigned regCount = retTypeDesc.GetReturnRegCount();
- assert(regCount == MAX_RET_REG_COUNT);
-
- if (varTypeIsEnregisterableStruct(op1))
- {
- // Right now the only enregistrable structs supported are SIMD vector types.
- assert(varTypeIsSIMD(op1));
- assert(!op1->isContained());
-
- // This is a case of operand is in a single reg and needs to be
- // returned in multiple ABI return registers.
- regNumber opReg = genConsumeReg(op1);
- regNumber reg0 = retTypeDesc.GetABIReturnReg(0);
- regNumber reg1 = retTypeDesc.GetABIReturnReg(1);
-
- if (opReg != reg0 && opReg != reg1)
- {
- // Operand reg is different from return regs.
- // Copy opReg to reg0 and let it to be handled by one of the
- // two cases below.
- inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
- opReg = reg0;
- }
-
- if (opReg == reg0)
- {
- assert(opReg != reg1);
-
- // reg0 - already has required 8-byte in bit position [63:0].
- // reg1 = opReg.
- // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
- inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE);
- }
- else
- {
- assert(opReg == reg1);
-
- // reg0 = opReg.
- // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
- inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
- }
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01);
- }
- else
- {
- assert(op1->isContained());
-
- // Copy var on stack into ABI return registers
- int offset = 0;
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc.GetReturnRegType(i);
- regNumber reg = retTypeDesc.GetABIReturnReg(i);
- getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
- offset += genTypeSize(type);
- }
- }
- }
- else
- {
- assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
-
- genConsumeRegs(op1);
-
- GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
- GenTreeCall* call = actualOp1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- assert(regCount == MAX_RET_REG_COUNT);
-
- // Handle circular dependency between call allocated regs and ABI return regs.
- //
- // It is possible under LSRA stress that originally allocated regs of call node,
- // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But
- // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind
- // kind of circular dependency could arise between xmm0 and xmm1 return regs.
- // Codegen is expected to handle such circular dependency.
- //
- var_types regType0 = retTypeDesc->GetReturnRegType(0);
- regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0);
- regNumber allocatedReg0 = call->GetRegNumByIdx(0);
-
- var_types regType1 = retTypeDesc->GetReturnRegType(1);
- regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1);
- regNumber allocatedReg1 = call->GetRegNumByIdx(1);
-
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
- if (reloadReg != REG_NA)
- {
- allocatedReg0 = reloadReg;
- }
-
- reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
- if (reloadReg != REG_NA)
- {
- allocatedReg1 = reloadReg;
- }
- }
-
- if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0)
- {
- // Circular dependency - swap allocatedReg0 and allocatedReg1
- if (varTypeIsFloating(regType0))
- {
- assert(varTypeIsFloating(regType1));
-
- // The fastest way to swap two XMM regs is using PXOR
- inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
- inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
- inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
- }
- else
- {
- assert(varTypeIsIntegral(regType0));
- assert(varTypeIsIntegral(regType1));
- inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
- }
- }
- else if (allocatedReg1 == returnReg0)
- {
- // Change the order of moves to correctly handle dependency.
- if (allocatedReg1 != returnReg1)
- {
- inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
- }
-
- if (allocatedReg0 != returnReg0)
- {
- inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
- }
- }
- else
- {
- // No circular dependency case.
- if (allocatedReg0 != returnReg0)
- {
- inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
- }
-
- if (allocatedReg1 != returnReg1)
- {
- inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
- }
- }
- }
-#else
- unreached();
-#endif
-}
-
-//------------------------------------------------------------------------
-// genReturn: Generates code for return statement.
-// In case of struct return, delegates to the genStructReturn method.
-//
-// Arguments:
-// treeNode - The GT_RETURN or GT_RETFILT tree node.
-//
-// Return Value:
-// None
-//
-void CodeGen::genReturn(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
- GenTreePtr op1 = treeNode->gtGetOp1();
- var_types targetType = treeNode->TypeGet();
-
-#ifdef DEBUG
- if (targetType == TYP_VOID)
- {
- assert(op1 == nullptr);
- }
-#endif
-
-#ifdef _TARGET_X86_
- if (treeNode->TypeGet() == TYP_LONG)
- {
- assert(op1 != nullptr);
- noway_assert(op1->OperGet() == GT_LONG);
- GenTree* loRetVal = op1->gtGetOp1();
- GenTree* hiRetVal = op1->gtGetOp2();
- noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
-
- genConsumeReg(loRetVal);
- genConsumeReg(hiRetVal);
- if (loRetVal->gtRegNum != REG_LNGRET_LO)
- {
- inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
- }
- if (hiRetVal->gtRegNum != REG_LNGRET_HI)
- {
- inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
- }
- }
- else
-#endif // !defined(_TARGET_X86_)
- {
- if (isStructReturn(treeNode))
- {
- genStructReturn(treeNode);
- }
- else if (targetType != TYP_VOID)
- {
- assert(op1 != nullptr);
- noway_assert(op1->gtRegNum != REG_NA);
-
- // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
- // consumed a reg for the operand. This is because the variable
- // is dead after return. But we are issuing more instructions
- // like "profiler leave callback" after this consumption. So
- // if you are issuing more instructions after this point,
- // remember to keep the variable live up until the new method
- // exit point where it is actually dead.
- genConsumeReg(op1);
-
- regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
-#ifdef _TARGET_X86_
- if (varTypeIsFloating(treeNode))
- {
- // Spill the return value register from an XMM register to the stack, then load it on the x87 stack.
- // If it already has a home location, use that. Otherwise, we need a temp.
- if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame)
- {
- // Store local variable to its home location, if necessary.
- if ((op1->gtFlags & GTF_REG_VAL) != 0)
- {
- op1->gtFlags &= ~GTF_REG_VAL;
- inst_TT_RV(ins_Store(op1->gtType,
- compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)),
- op1, op1->gtRegNum);
- }
- // Now, load it to the fp stack.
- getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
- }
- else
- {
- // Spill the value, which should be in a register, then load it to the fp stack.
- // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
- op1->gtFlags |= GTF_SPILL;
- regSet.rsSpillTree(op1->gtRegNum, op1);
- op1->gtFlags |= GTF_SPILLED;
- op1->gtFlags &= ~GTF_SPILL;
-
- TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
- inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
- op1->gtFlags &= ~GTF_SPILLED;
- compiler->tmpRlsTemp(t);
- }
- }
- else
-#endif // _TARGET_X86_
- {
- if (op1->gtRegNum != retReg)
- {
- inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
- }
- }
- }
- }
-
-#ifdef PROFILING_SUPPORTED
- // !! Note !!
- // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
- // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
- // in the handling of the GT_RETURN statement.
- // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
- // for the return registers containing GC refs.
-
- // There will be a single return block while generating profiler ELT callbacks.
- //
- // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
- // In flowgraph and other places assert that the last node of a block marked as
- // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
- // maintain such an invariant irrespective of whether profiler hook needed or not.
- // Also, there is not much to be gained by materializing it as an explicit node.
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- // !! NOTE !!
- // Since we are invalidating the assumption that we would slip into the epilog
- // right after the "return", we need to preserve the return reg's GC state
- // across the call until actual method return.
- if (varTypeIsGC(compiler->info.compRetType))
- {
- gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
- }
-
- genProfilingLeaveCallback();
-
- if (varTypeIsGC(compiler->info.compRetType))
- {
- gcInfo.gcMarkRegSetNpt(REG_INTRET);
- }
- }
-#endif
-}
-
-/*****************************************************************************
- *
- * Generate code for a single node in the tree.
- * Preconditions: All operands have been evaluated
- *
- */
-void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
-{
- regNumber targetReg;
-#if !defined(_TARGET_64BIT_)
- if (treeNode->TypeGet() == TYP_LONG)
- {
- // All long enregistered nodes will have been decomposed into their
- // constituent lo and hi nodes.
- targetReg = REG_NA;
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- targetReg = treeNode->gtRegNum;
- }
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
-
-#ifdef DEBUG
- // Validate that all the operands for the current node are consumed in order.
- // This is important because LSRA ensures that any necessary copies will be
- // handled correctly.
- lastConsumedNode = nullptr;
- if (compiler->verbose)
- {
- unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
- printf("Generating: ");
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
-#endif // DEBUG
-
- // Is this a node whose value is already in a register? LSRA denotes this by
- // setting the GTF_REUSE_REG_VAL flag.
- if (treeNode->IsReuseRegVal())
- {
- // For now, this is only used for constant nodes.
- assert((treeNode->OperIsConst()));
- JITDUMP(" TreeNode is marked ReuseReg\n");
- return;
- }
-
- // contained nodes are part of their parents for codegen purposes
- // ex : immediates, most LEAs
- if (treeNode->isContained())
- {
- return;
- }
-
- switch (treeNode->gtOper)
- {
- case GT_START_NONGC:
- getEmitter()->emitDisableGC();
- break;
-
- case GT_PROF_HOOK:
-#ifdef PROFILING_SUPPORTED
- // We should be seeing this only if profiler hook is needed
- noway_assert(compiler->compIsProfilerHookNeeded());
-
- // Right now this node is used only for tail calls. In future if
- // we intend to use it for Enter or Leave hooks, add a data member
- // to this node indicating the kind of profiler hook. For example,
- // helper number can be used.
- genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
-#endif // PROFILING_SUPPORTED
- break;
-
- case GT_LCLHEAP:
- genLclHeap(treeNode);
- break;
-
- case GT_CNS_INT:
-#ifdef _TARGET_X86_
- NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants");
-#endif // _TARGET_X86_
- __fallthrough;
-
- case GT_CNS_DBL:
- genSetRegToConst(targetReg, targetType, treeNode);
- genProduceReg(treeNode);
- break;
-
- case GT_NEG:
- case GT_NOT:
- if (varTypeIsFloating(targetType))
- {
- assert(treeNode->gtOper == GT_NEG);
- genSSE2BitwiseOp(treeNode);
- }
- else
- {
- GenTreePtr operand = treeNode->gtGetOp1();
- assert(!operand->isContained());
- regNumber operandReg = genConsumeReg(operand);
-
- if (operandReg != targetReg)
- {
- inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
- }
-
- instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
- inst_RV(ins, targetReg, targetType);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_OR:
- case GT_XOR:
- case GT_AND:
- assert(varTypeIsIntegralOrI(treeNode));
- __fallthrough;
-
-#if !defined(_TARGET_64BIT_)
- case GT_ADD_LO:
- case GT_ADD_HI:
- case GT_SUB_LO:
- case GT_SUB_HI:
-#endif // !defined(_TARGET_64BIT_)
- case GT_ADD:
- case GT_SUB:
- genConsumeOperands(treeNode->AsOp());
- genCodeForBinary(treeNode);
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROL:
- case GT_ROR:
- genCodeForShift(treeNode);
- // genCodeForShift() calls genProduceReg()
- break;
-
- case GT_CAST:
-#if !defined(_TARGET_64BIT_)
- // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not
- // yet support casting FROM a long type either, and that's simpler to catch
- // here.
- NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG");
-#endif // !defined(_TARGET_64BIT_)
-
- if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
- {
- // Casts float/double <--> double/float
- genFloatToFloatCast(treeNode);
- }
- else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
- {
- // Casts float/double --> int32/int64
- genFloatToIntCast(treeNode);
- }
- else if (varTypeIsFloating(targetType))
- {
- // Casts int32/uint32/int64/uint64 --> float/double
- genIntToFloatCast(treeNode);
- }
- else
- {
- // Casts int <--> int
- genIntToIntCast(treeNode);
- }
- // The per-case functions call genProduceReg()
- break;
-
- case GT_LCL_VAR:
- {
- // lcl_vars are not defs
- assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
-
- GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
- bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
-
- if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
- {
- assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
- }
-
- // If this is a register candidate that has been spilled, genConsumeReg() will
- // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
-
- if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
- {
- assert(!isRegCandidate);
-
- emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
- emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
- genProduceReg(treeNode);
- }
- }
- break;
-
- case GT_LCL_FLD_ADDR:
- case GT_LCL_VAR_ADDR:
- // Address of a local var. This by itself should never be allocated a register.
- // If it is worth storing the address in a register then it should be cse'ed into
- // a temp and that would be allocated a register.
- noway_assert(targetType == TYP_BYREF);
- noway_assert(!treeNode->InReg());
-
- inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
- genProduceReg(treeNode);
- break;
-
- case GT_LCL_FLD:
- {
- noway_assert(targetType != TYP_STRUCT);
- noway_assert(treeNode->gtRegNum != REG_NA);
-
-#ifdef FEATURE_SIMD
- // Loading of TYP_SIMD12 (i.e. Vector3) field
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genLoadLclFldTypeSIMD12(treeNode);
- break;
- }
-#endif
-
- emitAttr size = emitTypeSize(targetType);
- unsigned offs = treeNode->gtLclFld.gtLclOffs;
- unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
- assert(varNum < compiler->lvaCount);
-
- emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_STORE_LCL_FLD:
- {
- noway_assert(targetType != TYP_STRUCT);
- noway_assert(!treeNode->InReg());
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
-#ifdef FEATURE_SIMD
- // storing of TYP_SIMD12 (i.e. Vector3) field
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genStoreLclFldTypeSIMD12(treeNode);
- break;
- }
-#endif
- GenTreePtr op1 = treeNode->gtGetOp1();
- genConsumeRegs(op1);
- emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
- }
- break;
-
- case GT_STORE_LCL_VAR:
- {
- GenTreePtr op1 = treeNode->gtGetOp1();
-
- // var = call, where call returns a multi-reg return value
- // case is handled separately.
- if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
- {
- genMultiRegCallStoreToLocal(treeNode);
- }
- else
- {
- noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
-
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
-
-#if !defined(_TARGET_64BIT_)
- if (treeNode->TypeGet() == TYP_LONG)
- {
- genStoreLongLclVar(treeNode);
- break;
- }
-#endif // !defined(_TARGET_64BIT_)
-
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
- {
- // This is only possible for a zero-init.
- noway_assert(op1->IsIntegralConst(0));
- genSIMDZero(targetType, varDsc->lvBaseType, targetReg);
- genProduceReg(treeNode);
- break;
- }
-#endif // FEATURE_SIMD
-
- genConsumeRegs(op1);
-
- if (treeNode->gtRegNum == REG_NA)
- {
- // stack store
- emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)),
- emitTypeSize(targetType), treeNode);
- varDsc->lvRegNum = REG_STK;
- }
- else
- {
- bool containedOp1 = op1->isContained();
- // Look for the case where we have a constant zero which we've marked for reuse,
- // but which isn't actually in the register we want. In that case, it's better to create
- // zero in the target register, because an xor is smaller than a copy. Note that we could
- // potentially handle this in the register allocator, but we can't always catch it there
- // because the target may not have a register allocated for it yet.
- if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) &&
- (op1->IsIntegralConst(0) || op1->IsFPZero()))
- {
- op1->gtRegNum = REG_NA;
- op1->ResetReuseRegVal();
- containedOp1 = true;
- }
-
- if (containedOp1)
- {
- // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
- // must be a constant. However, in the future we might want to support a contained memory op.
- // This is a bit tricky because we have to decide it's contained before register allocation,
- // and this would be a case where, once that's done, we need to mark that node as always
- // requiring a register - which we always assume now anyway, but once we "optimize" that
- // we'll have to take cases like this into account.
- assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
- genSetRegToConst(treeNode->gtRegNum, targetType, op1);
- }
- else if (op1->gtRegNum != treeNode->gtRegNum)
- {
- assert(op1->gtRegNum != REG_NA);
- emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
- }
- }
- }
-
- if (treeNode->gtRegNum != REG_NA)
- {
- genProduceReg(treeNode);
- }
- }
- break;
-
- case GT_RETFILT:
- // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
- // the return register, if it's not already there. The processing is the same as GT_RETURN.
- if (targetType != TYP_VOID)
- {
- // For filters, the IL spec says the result is type int32. Further, the only specified legal values
- // are 0 or 1, with the use of other values "undefined".
- assert(targetType == TYP_INT);
- }
-
- __fallthrough;
-
- case GT_RETURN:
- genReturn(treeNode);
- break;
-
- case GT_LEA:
- {
- // if we are here, it is the case where there is an LEA that cannot
- // be folded into a parent instruction
- GenTreeAddrMode* lea = treeNode->AsAddrMode();
- genLeaInstruction(lea);
- }
- // genLeaInstruction calls genProduceReg()
- break;
-
- case GT_IND:
-#ifdef FEATURE_SIMD
- // Handling of Vector3 type values loaded through indirection.
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genLoadIndTypeSIMD12(treeNode);
- break;
- }
-#endif // FEATURE_SIMD
-
- genConsumeAddress(treeNode->AsIndir()->Addr());
- emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
- genProduceReg(treeNode);
- break;
-
- case GT_MULHI:
- genCodeForMulHi(treeNode->AsOp());
- genProduceReg(treeNode);
- break;
-
- case GT_MUL:
- {
- instruction ins;
- emitAttr size = emitTypeSize(treeNode);
- bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
- bool requiresOverflowCheck = treeNode->gtOverflowEx();
-
- GenTree* op1 = treeNode->gtGetOp1();
- GenTree* op2 = treeNode->gtGetOp2();
-
- // there are 3 forms of x64 multiply:
- // 1-op form with 128 result: RDX:RAX = RAX * rm
- // 2-op form: reg *= rm
- // 3-op form: reg = rm * imm
-
- genConsumeOperands(treeNode->AsOp());
-
- // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
- //
- // immOp :: Only one operand can be an immediate
- // rmOp :: Only one operand can be a memory op.
- // regOp :: A register op (especially the operand that matches 'targetReg')
- // (can be nullptr when we have both a memory op and an immediate op)
-
- GenTree* immOp = nullptr;
- GenTree* rmOp = op1;
- GenTree* regOp;
-
- if (op2->isContainedIntOrIImmed())
- {
- immOp = op2;
- }
- else if (op1->isContainedIntOrIImmed())
- {
- immOp = op1;
- rmOp = op2;
- }
-
- if (immOp != nullptr)
- {
- // This must be a non-floating point operation.
- assert(!varTypeIsFloating(treeNode));
-
- // CQ: When possible use LEA for mul by imm 3, 5 or 9
- ssize_t imm = immOp->AsIntConCommon()->IconValue();
-
- if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
- {
- // We will use the LEA instruction to perform this multiply
- // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
- unsigned int scale = (unsigned int)(imm - 1);
- getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
- }
- else
- {
- // use the 3-op form with immediate
- ins = getEmitter()->inst3opImulForReg(targetReg);
- emit->emitInsBinary(ins, size, rmOp, immOp);
- }
- }
- else // we have no contained immediate operand
- {
- regOp = op1;
- rmOp = op2;
-
- regNumber mulTargetReg = targetReg;
- if (isUnsignedMultiply && requiresOverflowCheck)
- {
- ins = INS_mulEAX;
- mulTargetReg = REG_RAX;
- }
- else
- {
- ins = genGetInsForOper(GT_MUL, targetType);
- }
-
- // Set rmOp to the contain memory operand (if any)
- // or set regOp to the op2 when it has the matching target register for our multiply op
- //
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
- {
- regOp = op2;
- rmOp = op1;
- }
- assert(!regOp->isContained());
-
- // Setup targetReg when neither of the source operands was a matching register
- if (regOp->gtRegNum != mulTargetReg)
- {
- inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
- }
-
- emit->emitInsBinary(ins, size, treeNode, rmOp);
-
- // Move the result to the desired register, if necessary
- if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
- {
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
- }
- }
-
- if (requiresOverflowCheck)
- {
- // Overflow checking is only used for non-floating point types
- noway_assert(!varTypeIsFloating(treeNode));
-
- genCheckOverflow(treeNode);
- }
- }
- genProduceReg(treeNode);
- break;
-
- case GT_MOD:
- case GT_UDIV:
- case GT_UMOD:
- // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
- // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
- // on float/double args.
- noway_assert(!varTypeIsFloating(treeNode));
- __fallthrough;
-
- case GT_DIV:
- genCodeForDivMod(treeNode->AsOp());
- break;
-
- case GT_INTRINSIC:
- genIntrinsic(treeNode);
- break;
-
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- genSIMDIntrinsic(treeNode->AsSIMD());
- break;
-#endif // FEATURE_SIMD
-
- case GT_CKFINITE:
- genCkfinite(treeNode);
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- {
- // TODO-XArch-CQ: Check if we can use the currently set flags.
- // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
- // (signed < or >= where targetReg != REG_NA)
-
- GenTreePtr op1 = treeNode->gtGetOp1();
- var_types op1Type = op1->TypeGet();
-
- if (varTypeIsFloating(op1Type))
- {
- genCompareFloat(treeNode);
- }
-#if !defined(_TARGET_64BIT_)
- // X86 Long comparison
- else if (varTypeIsLong(op1Type))
- {
- // When not materializing the result in a register, the compare logic is generated
- // when we generate the GT_JTRUE.
- if (treeNode->gtRegNum != REG_NA)
- {
- genCompareLong(treeNode);
- }
- else
- {
- // We generate the compare when we generate the GT_JTRUE, but we need to consume
- // the operands now.
- genConsumeOperands(treeNode->AsOp());
- }
- }
-#endif // !defined(_TARGET_64BIT_)
- else
- {
- genCompareInt(treeNode);
- }
- }
- break;
-
- case GT_JTRUE:
- {
- GenTree* cmp = treeNode->gtOp.gtOp1;
-
- assert(cmp->OperIsCompare());
- assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
-
-#if !defined(_TARGET_64BIT_)
- // For long compares, we emit special logic
- if (varTypeIsLong(cmp->gtGetOp1()))
- {
- genJTrueLong(cmp);
- }
- else
-#endif
- {
- // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
- // is governed by a flag NOT by the inherent type of the node
- // TODO-XArch-CQ: Check if we can use the currently set flags.
- emitJumpKind jumpKind[2];
- bool branchToTrueLabel[2];
- genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
-
- BasicBlock* skipLabel = nullptr;
- if (jumpKind[0] != EJ_NONE)
- {
- BasicBlock* jmpTarget;
- if (branchToTrueLabel[0])
- {
- jmpTarget = compiler->compCurBB->bbJumpDest;
- }
- else
- {
- // This case arises only for ordered GT_EQ right now
- assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
- skipLabel = genCreateTempLabel();
- jmpTarget = skipLabel;
- }
-
- inst_JMP(jumpKind[0], jmpTarget);
- }
-
- if (jumpKind[1] != EJ_NONE)
- {
- // the second conditional branch always has to be to the true label
- assert(branchToTrueLabel[1]);
- inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
- }
-
- if (skipLabel != nullptr)
- {
- genDefineTempLabel(skipLabel);
- }
- }
- }
- break;
-
- case GT_RETURNTRAP:
- {
- // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
- // based on the contents of 'data'
-
- GenTree* data = treeNode->gtOp.gtOp1;
- genConsumeRegs(data);
- GenTreeIntCon cns = intForm(TYP_INT, 0);
- emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
-
- BasicBlock* skipLabel = genCreateTempLabel();
-
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, skipLabel);
-
- // emit the call to the EE-helper that stops for GC (or other reasons)
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
- assert(genIsValidIntReg(tmpReg));
-
- genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
- genDefineTempLabel(skipLabel);
- }
- break;
-
- case GT_STOREIND:
- genStoreInd(treeNode);
- break;
-
- case GT_COPY:
- // This is handled at the time we call genConsumeReg() on the GT_COPY
- break;
-
- case GT_SWAP:
- {
- // Swap is only supported for lclVar operands that are enregistered
- // We do not consume or produce any registers. Both operands remain enregistered.
- // However, the gc-ness may change.
- assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
-
- GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
- LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
- var_types type1 = varDsc1->TypeGet();
- GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
- LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
- var_types type2 = varDsc2->TypeGet();
-
- // We must have both int or both fp regs
- assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
-
- // FP swap is not yet implemented (and should have NYI'd in LSRA)
- assert(!varTypeIsFloating(type1));
-
- regNumber oldOp1Reg = lcl1->gtRegNum;
- regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
- regNumber oldOp2Reg = lcl2->gtRegNum;
- regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
-
- // We don't call genUpdateVarReg because we don't have a tree node with the new register.
- varDsc1->lvRegNum = oldOp2Reg;
- varDsc2->lvRegNum = oldOp1Reg;
-
- // Do the xchg
- emitAttr size = EA_PTRSIZE;
- if (varTypeGCtype(type1) != varTypeGCtype(type2))
- {
- // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
- // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
- size = EA_GCREF;
- }
- inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
-
- // Update the gcInfo.
- // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
- gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
- gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
-
- // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
- // It will also dump the updates.
- gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
- gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
- }
- break;
-
- case GT_LIST:
- case GT_ARGPLACE:
- // Nothing to do
- break;
-
- case GT_PUTARG_STK:
- genPutArgStk(treeNode);
- break;
-
- case GT_PUTARG_REG:
- {
-#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
- noway_assert(targetType != TYP_STRUCT);
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- // commas show up here commonly, as part of a nullchk operation
- GenTree* op1 = treeNode->gtOp.gtOp1;
- // If child node is not already in the register we need, move it
- genConsumeReg(op1);
- if (treeNode->gtRegNum != op1->gtRegNum)
- {
- inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
- }
- genProduceReg(treeNode);
- }
- break;
-
- case GT_CALL:
- genCallInstruction(treeNode);
- break;
-
- case GT_JMP:
- genJmpMethod(treeNode);
- break;
-
- case GT_LOCKADD:
- case GT_XCHG:
- case GT_XADD:
- genLockedInstructions(treeNode);
- break;
-
- case GT_MEMORYBARRIER:
- instGen_MemoryBarrier();
- break;
-
- case GT_CMPXCHG:
- {
- GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1
- GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2
- GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3
-
- assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
- assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
-
- genConsumeReg(location);
- genConsumeReg(value);
- genConsumeReg(comparand);
- // comparand goes to RAX;
- // Note that we must issue this move after the genConsumeRegs(), in case any of the above
- // have a GT_COPY from RAX.
- if (comparand->gtRegNum != REG_RAX)
- {
- inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
- }
-
- // location is Rm
- instGen(INS_lock);
-
- emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
-
- // Result is in RAX
- if (targetReg != REG_RAX)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
- }
- }
- genProduceReg(treeNode);
- break;
-
- case GT_RELOAD:
- // do nothing - reload is just a marker.
- // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
- // into the register specified in this node.
- break;
-
- case GT_NOP:
- break;
-
- case GT_NO_OP:
- if (treeNode->gtFlags & GTF_NO_OP_NO)
- {
- noway_assert(!"GTF_NO_OP_NO should not be set");
- }
- else
- {
- getEmitter()->emitIns_Nop(1);
- }
- break;
-
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- genRangeCheck(treeNode);
- break;
-
- case GT_PHYSREG:
- if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
- {
- inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
-
- genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_PHYSREGDST:
- break;
-
- case GT_NULLCHECK:
- {
- assert(!treeNode->gtOp.gtOp1->isContained());
- regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
- emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
- }
- break;
-
- case GT_CATCH_ARG:
-
- noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
-
- /* Catch arguments get passed in a register. genCodeForBBlist()
- would have marked it as holding a GC object, but not used. */
-
- noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
- genConsumeReg(treeNode);
- break;
-
-#if !FEATURE_EH_FUNCLETS
- case GT_END_LFIN:
-
- // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
- // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var
-
- unsigned finallyNesting;
- finallyNesting = treeNode->gtVal.gtVal1;
- noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount);
- noway_assert(finallyNesting < compiler->compHndBBtabCount);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
- TARGET_POINTER_SIZE); // below doesn't underflow.
- filterEndOffsetSlotOffs =
- (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
- break;
-#endif // !FEATURE_EH_FUNCLETS
-
- case GT_PINVOKE_PROLOG:
- noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
-
- // the runtime side requires the codegen here to be consistent
- emit->emitDisableRandomNops();
- break;
-
- case GT_LABEL:
- genPendingCallLabel = genCreateTempLabel();
- treeNode->gtLabel.gtLabBB = genPendingCallLabel;
- emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
- break;
-
- case GT_STORE_OBJ:
- if (treeNode->OperIsCopyBlkOp() && !treeNode->AsBlk()->gtBlkOpGcUnsafe)
- {
- assert(treeNode->AsObj()->gtGcPtrCount != 0);
- genCodeForCpObj(treeNode->AsObj());
- break;
- }
- __fallthrough;
-
- case GT_STORE_DYN_BLK:
- case GT_STORE_BLK:
- genCodeForStoreBlk(treeNode->AsBlk());
- break;
-
- case GT_JMPTABLE:
- genJumpTable(treeNode);
- break;
-
- case GT_SWITCH_TABLE:
- genTableBasedSwitch(treeNode);
- break;
-
- case GT_ARR_INDEX:
- genCodeForArrIndex(treeNode->AsArrIndex());
- break;
-
- case GT_ARR_OFFSET:
- genCodeForArrOffset(treeNode->AsArrOffs());
- break;
-
- case GT_CLS_VAR_ADDR:
- getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
- genProduceReg(treeNode);
- break;
-
-#if !defined(_TARGET_64BIT_)
- case GT_LONG:
- assert(!treeNode->isContained());
- genConsumeRegs(treeNode);
- break;
-#endif
-
- case GT_IL_OFFSET:
- // Do nothing; these nodes are simply markers for debug info.
- break;
-
- default:
- {
-#ifdef DEBUG
- char message[256];
- sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
-#endif
- assert(!"Unknown node in codegen");
- }
- break;
- }
-}
-
-//----------------------------------------------------------------------------------
-// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
-//
-// Arguments:
-// treeNode - Gentree of GT_STORE_LCL_VAR
-//
-// Return Value:
-// None
-//
-// Assumption:
-// The child of store is a multi-reg call node.
-// genProduceReg() on treeNode is made by caller of this routine.
-//
-void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
- assert(varTypeIsStruct(treeNode));
-
- // Assumption: current x64 Unix implementation requires that a multi-reg struct
- // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
- // being struct promoted.
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- noway_assert(varDsc->lvIsMultiRegRet);
-
- GenTree* op1 = treeNode->gtGetOp1();
- GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
- GenTreeCall* call = actualOp1->AsCall();
- assert(call->HasMultiRegRetVal());
-
- genConsumeRegs(op1);
-
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- if (treeNode->gtRegNum != REG_NA)
- {
- // Right now the only enregistrable structs supported are SIMD types.
- assert(varTypeIsSIMD(treeNode));
- assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
- assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));
-
- // This is a case of two 8-bytes that comprise the operand is in
- // two different xmm registers and needs to assembled into a single
- // xmm register.
- regNumber targetReg = treeNode->gtRegNum;
- regNumber reg0 = call->GetRegNumByIdx(0);
- regNumber reg1 = call->GetRegNumByIdx(1);
-
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
- if (reloadReg != REG_NA)
- {
- reg0 = reloadReg;
- }
-
- reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
- if (reloadReg != REG_NA)
- {
- reg1 = reloadReg;
- }
- }
-
- if (targetReg != reg0 && targetReg != reg1)
- {
- // Copy reg0 into targetReg and let it to be handled by one
- // of the cases below.
- inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE);
- targetReg = reg0;
- }
-
- if (targetReg == reg0)
- {
- // targeReg[63:0] = targetReg[63:0]
- // targetReg[127:64] = reg1[127:64]
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
- }
- else
- {
- assert(targetReg == reg1);
-
- // We need two shuffles to achieve this
- // First:
- // targeReg[63:0] = targetReg[63:0]
- // targetReg[127:64] = reg0[63:0]
- //
- // Second:
- // targeReg[63:0] = targetReg[127:64]
- // targetReg[127:64] = targetReg[63:0]
- //
- // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
- // and next swap low and high 8-bytes of targetReg to have them
- // rearranged in the right order.
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
- }
- }
- else
- {
- // Stack store
- int offset = 0;
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber reg = call->GetRegNumByIdx(i);
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
- if (reloadReg != REG_NA)
- {
- reg = reloadReg;
- }
- }
-
- assert(reg != REG_NA);
- getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
- offset += genTypeSize(type);
- }
-
- varDsc->lvRegNum = REG_STK;
- }
-#elif defined(_TARGET_X86_)
- // Longs are returned in two return registers on x86.
- assert(varTypeIsLong(treeNode));
-
- // Assumption: current x86 implementation requires that a multi-reg long
- // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
- // being promoted.
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- noway_assert(varDsc->lvIsMultiRegRet);
-
- GenTree* op1 = treeNode->gtGetOp1();
- GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
- GenTreeCall* call = actualOp1->AsCall();
- assert(call->HasMultiRegRetVal());
-
- genConsumeRegs(op1);
-
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- assert(regCount == MAX_RET_REG_COUNT);
-
- // Stack store
- int offset = 0;
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber reg = call->GetRegNumByIdx(i);
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
- if (reloadReg != REG_NA)
- {
- reg = reloadReg;
- }
- }
-
- assert(reg != REG_NA);
- getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
- offset += genTypeSize(type);
- }
-
- varDsc->lvRegNum = REG_STK;
-#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
- assert(!"Unreached");
-#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
-}
-
-//------------------------------------------------------------------------
-// genLclHeap: Generate code for localloc.
-//
-// Arguments:
-// tree - the localloc tree to generate.
-//
-// Notes:
-// Note that for x86, we don't track ESP movements while generating the localloc code.
-// The ESP tracking is used to report stack pointer-relative GC info, which is not
-// interesting while doing the localloc construction. Also, for functions with localloc,
-// we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function
-// call arguments. We store the ESP after the localloc is complete in the LocAllocSP
-// variable. This variable is implicitly reported to the VM in the GC info (its position
-// is defined by convention relative to other items), and is used by the GC to find the
-// "base" stack pointer in functions with localloc.
-//
-void CodeGen::genLclHeap(GenTreePtr tree)
-{
- assert(tree->OperGet() == GT_LCLHEAP);
- assert(compiler->compLocallocUsed);
-
- GenTreePtr size = tree->gtOp.gtOp1;
- noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
-
- regNumber targetReg = tree->gtRegNum;
- regMaskTP tmpRegsMask = tree->gtRsvdRegs;
- regNumber regCnt = REG_NA;
- var_types type = genActualType(size->gtType);
- emitAttr easz = emitTypeSize(type);
- BasicBlock* endLabel = nullptr;
-
-#ifdef DEBUG
- // Verify ESP
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
-
- BasicBlock* esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
- }
-#endif
-
- noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
- noway_assert(genStackLevel == 0); // Can't have anything on the stack
-
- unsigned stackAdjustment = 0;
- BasicBlock* loop = nullptr;
-
- // compute the amount of memory to allocate to properly STACK_ALIGN.
- size_t amount = 0;
- if (size->IsCnsIntOrI())
- {
- // If size is a constant, then it must be contained.
- assert(size->isContained());
-
- // If amount is zero then return null in targetReg
- amount = size->gtIntCon.gtIconVal;
- if (amount == 0)
- {
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
- goto BAILOUT;
- }
-
- // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
- amount = AlignUp(amount, STACK_ALIGN);
- }
- else
- {
- // The localloc requested memory size is non-constant.
-
- // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
- genConsumeRegAndCopy(size, targetReg);
- endLabel = genCreateTempLabel();
- getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
- inst_JMP(EJ_je, endLabel);
-
- // Compute the size of the block to allocate and perform alignment.
- // If compInitMem=true, we can reuse targetReg as regcnt,
- // since we don't need any internal registers.
- if (compiler->info.compInitMem)
- {
- assert(genCountBits(tmpRegsMask) == 0);
- regCnt = targetReg;
- }
- else
- {
- assert(genCountBits(tmpRegsMask) >= 1);
- regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
- tmpRegsMask &= ~regCntMask;
- regCnt = genRegNumFromMask(regCntMask);
- if (regCnt != targetReg)
- {
- // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
- inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
- }
- }
-
- // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done
- // by code like:
- // add reg, 15
- // and reg, -16
- // However, in the initialized memory case, we need the count of STACK_ALIGN-sized
- // elements, not a byte count, after the alignment. So instead of the "and", which
- // becomes unnecessary, generate a shift, e.g.:
- // add reg, 15
- // shr reg, 4
-
- inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));
-
- if (compiler->info.compInitMem)
- {
- // Convert the count from a count of bytes to a loop count. We will loop once per
- // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
- // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
- // words per iteration on x64. We will shift off all the stack alignment bits
- // added above, so there is no need for an 'and' instruction.
-
- // --- shr regCnt, 2 (or 4) ---
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
- }
- else
- {
- // Otherwise, mask off the low bits to align the byte count.
- inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
- }
- }
-
-#if FEATURE_FIXED_OUT_ARGS
- // If we have an outgoing arg area then we must adjust the SP by popping off the
- // outgoing arg area. We will restore it right before we return from this method.
- //
- // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
- // are the cases that need to be handled:
- // i) Method has out-going arg area.
- // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
- // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space.
- // ii) Method has no out-going arg area.
- // Nothing to pop off from the stack.
- if (compiler->lvaOutgoingArgSpaceSize > 0)
- {
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
- // aligned
- inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
- stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
- }
-#endif
-
- if (size->IsCnsIntOrI())
- {
- // We should reach here only for non-zero, constant size allocations.
- assert(amount > 0);
- assert((amount % STACK_ALIGN) == 0);
- assert((amount % REGSIZE_BYTES) == 0);
-
- // For small allocations we will generate up to six push 0 inline
- size_t cntRegSizedWords = amount / REGSIZE_BYTES;
- if (cntRegSizedWords <= 6)
- {
- for (; cntRegSizedWords != 0; cntRegSizedWords--)
- {
- inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
- }
- goto ALLOC_DONE;
- }
-
- bool doNoInitLessThanOnePageAlloc =
- !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <=
-
-#ifdef _TARGET_X86_
- bool needRegCntRegister = true;
-#else // !_TARGET_X86_
- bool needRegCntRegister = !doNoInitLessThanOnePageAlloc;
-#endif // !_TARGET_X86_
-
- if (needRegCntRegister)
- {
- // If compInitMem=true, we can reuse targetReg as regcnt.
- // Since size is a constant, regCnt is not yet initialized.
- assert(regCnt == REG_NA);
- if (compiler->info.compInitMem)
- {
- assert(genCountBits(tmpRegsMask) == 0);
- regCnt = targetReg;
- }
- else
- {
- assert(genCountBits(tmpRegsMask) >= 1);
- regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
- tmpRegsMask &= ~regCntMask;
- regCnt = genRegNumFromMask(regCntMask);
- }
- }
-
- if (doNoInitLessThanOnePageAlloc)
- {
- // Since the size is less than a page, simply adjust ESP.
- // ESP might already be in the guard page, so we must touch it BEFORE
- // the alloc, not after.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_X86_
- // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
- // to ESP. So do the work in the count register.
- // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
- // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
- // track".
- inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
- inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
- inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
-#else // !_TARGET_X86_
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
- inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
-#endif // !_TARGET_X86_
-
- goto ALLOC_DONE;
- }
-
- // else, "mov regCnt, amount"
-
- if (compiler->info.compInitMem)
- {
- // When initializing memory, we want 'amount' to be the loop count.
- assert((amount % STACK_ALIGN) == 0);
- amount /= STACK_ALIGN;
- }
-
- genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
- }
-
- loop = genCreateTempLabel();
- if (compiler->info.compInitMem)
- {
- // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
- // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
- // Since we have to zero out the allocated memory AND ensure that RSP is always valid
- // by tickling the pages, we will just push 0's on the stack.
-
- assert(genIsValidIntReg(regCnt));
-
- // Loop:
- genDefineTempLabel(loop);
-
-#if defined(_TARGET_AMD64_)
- // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
-#elif defined(_TARGET_X86_)
- // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
- inst_IV(INS_push_hide, 0); // --- push 4-byte 0
-#endif // _TARGET_X86_
-
- // Decrement the loop counter and loop if not done.
- inst_RV(INS_dec, regCnt, TYP_I_IMPL);
- inst_JMP(EJ_jne, loop);
- }
- else
- {
- // At this point 'regCnt' is set to the total number of bytes to localloc.
- //
- // We don't need to zero out the allocated memory. However, we do have
- // to tickle the pages to ensure that ESP is always valid and is
- // in sync with the "stack guard page". Note that in the worst
- // case ESP is on the last byte of the guard page. Thus you must
- // touch ESP+0 first not ESP+x01000.
- //
- // Another subtlety is that you don't want ESP to be exactly on the
- // boundary of the guard page because PUSH is predecrement, thus
- // call setup would not touch the guard page but just beyond it
- //
- // Note that we go through a few hoops so that ESP never points to
- // illegal pages at any time during the tickling process
- //
- // neg REGCNT
- // add REGCNT, ESP // reg now holds ultimate ESP
- // jb loop // result is smaller than orignial ESP (no wrap around)
- // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
- // loop:
- // test ESP, [ESP+0] // tickle the page
- // mov REGTMP, ESP
- // sub REGTMP, PAGE_SIZE
- // mov ESP, REGTMP
- // cmp ESP, REGCNT
- // jae loop
- //
- // mov ESP, REG
- // end:
- inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
- inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
- inst_JMP(EJ_jb, loop);
-
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
-
- genDefineTempLabel(loop);
-
- // Tickle the decremented value, and move back to ESP,
- // note that it has to be done BEFORE the update of ESP since
- // ESP might already be on the guard page. It is OK to leave
- // the final value of ESP on the guard page
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
-
- // This is a harmless trick to avoid the emitter trying to track the
- // decrement of the ESP - we do the subtraction in another reg instead
- // of adjusting ESP directly.
- assert(tmpRegsMask != RBM_NONE);
- assert(genCountBits(tmpRegsMask) == 1);
- regNumber regTmp = genRegNumFromMask(tmpRegsMask);
-
- inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
- inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
- inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
-
- inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
- inst_JMP(EJ_jae, loop);
-
- // Move the final value to ESP
- inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
- }
-
-ALLOC_DONE:
- // Re-adjust SP to allocate out-going arg area
- if (stackAdjustment > 0)
- {
- assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
- inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
- }
-
- // Return the stackalloc'ed address in result register.
- // TargetReg = RSP + stackAdjustment.
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
-
- if (endLabel != nullptr)
- {
- genDefineTempLabel(endLabel);
- }
-
-BAILOUT:
-
- // Write the lvaLocAllocSPvar stack frame slot
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
-
-#if STACK_PROBES
- if (compiler->opts.compNeedStackProbes)
- {
- genGenerateStackProbe();
- }
-#endif
-
-#ifdef DEBUG
- // Update new ESP
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
- }
-#endif
-
- genProduceReg(tree);
-}
-
-void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
-{
- if (storeBlkNode->gtBlkOpGcUnsafe)
- {
- getEmitter()->emitDisableGC();
- }
- bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
-
- switch (storeBlkNode->gtBlkOpKind)
- {
-#ifdef _TARGET_AMD64_
- case GenTreeBlk::BlkOpKindHelper:
- if (isCopyBlk)
- {
- genCodeForCpBlk(storeBlkNode);
- }
- else
- {
- genCodeForInitBlk(storeBlkNode);
- }
- break;
-#endif // _TARGET_AMD64_
- case GenTreeBlk::BlkOpKindRepInstr:
- if (isCopyBlk)
- {
- genCodeForCpBlkRepMovs(storeBlkNode);
- }
- else
- {
- genCodeForInitBlkRepStos(storeBlkNode);
- }
- break;
- case GenTreeBlk::BlkOpKindUnroll:
- if (isCopyBlk)
- {
- genCodeForCpBlkUnroll(storeBlkNode);
- }
- else
- {
- genCodeForInitBlkUnroll(storeBlkNode);
- }
- break;
- default:
- unreached();
- }
- if (storeBlkNode->gtBlkOpGcUnsafe)
- {
- getEmitter()->emitEnableGC();
- }
-}
-
-// Generate code for InitBlk using rep stos.
-// Preconditions:
-// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
-// Any value larger than that, we'll use the helper even if both the
-// fill byte and the size are integer constants.
-void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
-{
- // Make sure we got the arguments of the initblk/initobj operation in the right registers
- unsigned size = initBlkNode->Size();
- GenTreePtr dstAddr = initBlkNode->Addr();
- GenTreePtr initVal = initBlkNode->Data();
-
-#ifdef DEBUG
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
-#ifdef _TARGET_AMD64_
- assert(size != 0);
-#endif
- if (initVal->IsCnsIntOrI())
- {
-#ifdef _TARGET_AMD64_
- assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
-#else
- assert(size > CPBLK_UNROLL_LIMIT);
-#endif
- }
-
-#endif // DEBUG
-
- genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
- instGen(INS_r_stosb);
-}
-
-// Generate code for InitBlk by performing a loop unroll
-// Preconditions:
-// a) Both the size and fill byte value are integer constants.
-// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
-//
-void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
-{
- // Make sure we got the arguments of the initblk/initobj operation in the right registers
- unsigned size = initBlkNode->Size();
- GenTreePtr dstAddr = initBlkNode->Addr();
- GenTreePtr initVal = initBlkNode->Data();
-
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
- assert(size != 0);
- assert(size <= INITBLK_UNROLL_LIMIT);
- assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
-
- emitter* emit = getEmitter();
-
- genConsumeOperands(initBlkNode);
-
- // If the initVal was moved, or spilled and reloaded to a different register,
- // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
- // which needs to be the new register.
- regNumber valReg = initVal->gtRegNum;
- initVal = initVal->gtSkipReloadOrCopy();
-
- unsigned offset = 0;
-
- // Perform an unroll using SSE2 loads and stores.
- if (size >= XMM_REGSIZE_BYTES)
- {
- regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs);
-
-#ifdef DEBUG
- assert(initBlkNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(initBlkNode->gtRsvdRegs) == 1);
- assert(genIsValidFloatReg(tmpReg));
-#endif // DEBUG
-
- if (initVal->gtIntCon.gtIconVal != 0)
- {
- emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg);
- emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
-#ifdef _TARGET_X86_
- // For x86, we need one more to convert it from 8 bytes to 16 bytes.
- emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
-#endif // _TARGET_X86_
- }
- else
- {
- emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg);
- }
-
- // Determine how many 16 byte slots we're going to fill using SSE movs.
- size_t slots = size / XMM_REGSIZE_BYTES;
-
- while (slots-- > 0)
- {
- emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
- offset += XMM_REGSIZE_BYTES;
- }
- }
-
- // Fill the remainder (or a < 16 byte sized struct)
- if ((size & 8) != 0)
- {
-#ifdef _TARGET_X86_
- // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
- emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 4;
- emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 4;
-#else // !_TARGET_X86_
- emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 8;
-#endif // !_TARGET_X86_
- }
- if ((size & 4) != 0)
- {
- emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 4;
- }
- if ((size & 2) != 0)
- {
- emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 2;
- }
- if ((size & 1) != 0)
- {
- emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
- }
-}
-
-// Generates code for InitBlk by calling the VM memset helper function.
-// Preconditions:
-// a) The size argument of the InitBlk is not an integer constant.
-// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
-void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
-{
-#ifdef _TARGET_AMD64_
- // Make sure we got the arguments of the initblk operation in the right registers
- unsigned blockSize = initBlkNode->Size();
- GenTreePtr dstAddr = initBlkNode->Addr();
- GenTreePtr initVal = initBlkNode->Data();
-
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
-
- if (blockSize != 0)
- {
- assert(blockSize >= CPBLK_MOVS_LIMIT);
- }
-
- genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
- genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
-#else // !_TARGET_AMD64_
- NYI_X86("Helper call for InitBlk");
-#endif // !_TARGET_AMD64_
-}
-
-// Generate code for a load from some address + offset
-// baseNode: tree node which can be either a local address or arbitrary node
-// offset: distance from the baseNode from which to load
-void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
-{
- emitter* emit = getEmitter();
-
- if (baseNode->OperIsLocalAddr())
- {
- if (baseNode->gtOper == GT_LCL_FLD_ADDR)
- {
- offset += baseNode->gtLclFld.gtLclOffs;
- }
- emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
- }
- else
- {
- emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
- }
-}
-
-//------------------------------------------------------------------------
-// genCodeForStoreOffset: Generate code to store a reg to [base + offset].
-//
-// Arguments:
-// ins - the instruction to generate.
-// size - the size that needs to be stored.
-// src - the register which needs to be stored.
-// baseNode - the base, relative to which to store the src register.
-// offset - the offset that is added to the baseNode to calculate the address to store into.
-//
-void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
-{
- emitter* emit = getEmitter();
-
- if (baseNode->OperIsLocalAddr())
- {
- if (baseNode->gtOper == GT_LCL_FLD_ADDR)
- {
- offset += baseNode->gtLclFld.gtLclOffs;
- }
-
- emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
- }
- else
- {
- emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
- }
-}
-
-// Generates CpBlk code by performing a loop unroll
-// Preconditions:
-// The size argument of the CpBlk node is a constant and <= 64 bytes.
-// This may seem small but covers >95% of the cases in several framework assemblies.
-//
-void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
-{
- // Make sure we got the arguments of the cpblk operation in the right registers
- unsigned size = cpBlkNode->Size();
- GenTreePtr dstAddr = cpBlkNode->Addr();
- GenTreePtr source = cpBlkNode->Data();
- GenTreePtr srcAddr = nullptr;
- assert(size <= CPBLK_UNROLL_LIMIT);
-
- emitter* emit = getEmitter();
-
- if (source->gtOper == GT_IND)
- {
- srcAddr = source->gtGetOp1();
- if (!srcAddr->isContained())
- {
- genConsumeReg(srcAddr);
- }
- }
- else
- {
- noway_assert(source->IsLocal());
- // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
- // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
- if (source->OperGet() == GT_LCL_VAR)
- {
- source->SetOper(GT_LCL_VAR_ADDR);
- }
- else
- {
- assert(source->OperGet() == GT_LCL_FLD);
- source->SetOper(GT_LCL_FLD_ADDR);
- }
- srcAddr = source;
- }
-
- if (!dstAddr->isContained())
- {
- genConsumeReg(dstAddr);
- }
-
- unsigned offset = 0;
-
- // If the size of this struct is larger than 16 bytes
- // let's use SSE2 to be able to do 16 byte at a time
- // loads and stores.
-
- if (size >= XMM_REGSIZE_BYTES)
- {
- assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
- regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT);
- assert(genIsValidFloatReg(xmmReg));
- size_t slots = size / XMM_REGSIZE_BYTES;
-
- // TODO: In the below code the load and store instructions are for 16 bytes, but the
- // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
- // this probably needs to be changed.
- while (slots-- > 0)
- {
- // Load
- genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
- // Store
- genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
- offset += XMM_REGSIZE_BYTES;
- }
- }
-
- // Fill the remainder (15 bytes or less) if there's one.
- if ((size & 0xf) != 0)
- {
- // Grab the integer temp register to emit the remaining loads and stores.
- regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
-
- if ((size & 8) != 0)
- {
-#ifdef _TARGET_X86_
- // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
- for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
- }
-#else // !_TARGET_X86_
- genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
- offset += 8;
-#endif // !_TARGET_X86_
- }
- if ((size & 4) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
- offset += 4;
- }
- if ((size & 2) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
- offset += 2;
- }
- if ((size & 1) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
- }
- }
-}
-
-// Generate code for CpBlk by using rep movs
-// Preconditions:
-// The size argument of the CpBlk is a constant and is between
-// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
-void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
-{
- // Make sure we got the arguments of the cpblk operation in the right registers
- unsigned size = cpBlkNode->Size();
- GenTreePtr dstAddr = cpBlkNode->Addr();
- GenTreePtr source = cpBlkNode->Data();
- GenTreePtr srcAddr = nullptr;
-
-#ifdef DEBUG
- assert(!dstAddr->isContained());
- assert(source->isContained());
-
-#ifdef _TARGET_X86_
- if (size == 0)
- {
- noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK);
- }
- else
-#endif
- {
-#ifdef _TARGET_X64_
- assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
-#else
- assert(size > CPBLK_UNROLL_LIMIT);
-#endif
- }
-#endif // DEBUG
-
- genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
- instGen(INS_r_movsb);
-}
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-
-//---------------------------------------------------------------------------------------------------------------//
-// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
-//
-// Arguments:
-// putArgNode - the PutArgStk tree.
-// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
-//
-// TODO-Amd64-Unix: Try to share code with copyblk.
-// Need refactoring of copyblk before it could be used for putarg_stk.
-// The difference for now is that a putarg_stk contains its children, while cpyblk does not.
-// This creates differences in code. After some significant refactoring it could be reused.
-//
-void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
-{
- // We will never call this method for SIMD types, which are stored directly
- // in genPutStructArgStk().
- noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
-
- // Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr dstAddr = putArgNode;
- GenTreePtr src = putArgNode->gtOp.gtOp1;
-
- size_t size = putArgNode->getArgSize();
- assert(size <= CPBLK_UNROLL_LIMIT);
-
- emitter* emit = getEmitter();
- unsigned putArgOffset = putArgNode->getArgOffset();
-
- assert(src->isContained());
-
- assert(src->gtOper == GT_OBJ);
-
- if (!src->gtOp.gtOp1->isContained())
- {
- genConsumeReg(src->gtOp.gtOp1);
- }
-
- unsigned offset = 0;
-
- // If the size of this struct is larger than 16 bytes
- // let's use SSE2 to be able to do 16 byte at a time
- // loads and stores.
- if (size >= XMM_REGSIZE_BYTES)
- {
- assert(putArgNode->gtRsvdRegs != RBM_NONE);
- regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
- assert(genIsValidFloatReg(xmmReg));
- size_t slots = size / XMM_REGSIZE_BYTES;
-
- assert(putArgNode->gtGetOp1()->isContained());
- assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ);
-
- // TODO: In the below code the load and store instructions are for 16 bytes, but the
- // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
- // this probably needs to be changed.
- while (slots-- > 0)
- {
- // Load
- genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(),
- offset); // Load the address of the child of the Obj node.
-
- // Store
- emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset);
-
- offset += XMM_REGSIZE_BYTES;
- }
- }
-
- // Fill the remainder (15 bytes or less) if there's one.
- if ((size & 0xf) != 0)
- {
- // Grab the integer temp register to emit the remaining loads and stores.
- regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
- assert(genIsValidIntReg(tmpReg));
-
- if ((size & 8) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 8;
- }
-
- if ((size & 4) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 4;
- }
-
- if ((size & 2) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 2;
- }
-
- if ((size & 1) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
- emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
-//
-// Arguments:
-// putArgNode - the PutArgStk tree.
-// baseVarNum - the base var number, relative to which the by-val struct bits will go.
-//
-// Preconditions:
-// The size argument of the PutArgStk (for structs) is a constant and is between
-// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
-//
-void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
-{
- assert(putArgNode->TypeGet() == TYP_STRUCT);
- assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
- assert(baseVarNum != BAD_VAR_NUM);
-
- // Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr dstAddr = putArgNode;
- GenTreePtr srcAddr = putArgNode->gtGetOp1();
-
- // Validate state.
- assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
- assert(srcAddr->isContained());
-
- genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
- instGen(INS_r_movsb);
-}
-
-//------------------------------------------------------------------------
-// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
-// must be cleared to zeroes. The native compiler doesn't clear the upper bits
-// and there is no way to know if the caller is native or not. So, the upper
-// 32 bits of Vector argument on stack are always cleared to zero.
-#ifdef FEATURE_SIMD
-void CodeGen::genClearStackVec3ArgUpperBits()
-{
-#ifdef DEBUG
- if (verbose)
- printf("*************** In genClearStackVec3ArgUpperBits()\n");
-#endif
-
- assert(compiler->compGeneratingProlog);
-
- unsigned varNum = 0;
-
- for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
- {
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
- assert(varDsc->lvIsParam);
-
- // Does var has simd12 type?
- if (varDsc->lvType != TYP_SIMD12)
- {
- continue;
- }
-
- if (!varDsc->lvIsRegArg)
- {
- // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
- getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0);
- }
- else
- {
- // Assume that for x64 linux, an argument is fully in registers
- // or fully on stack.
- regNumber argReg = varDsc->GetOtherArgReg();
-
- // Clear the upper 32 bits by two shift instructions.
- // argReg = argReg << 96
- getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
- // argReg = argReg >> 96
- getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
- }
- }
-}
-#endif // FEATURE_SIMD
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
-// Generate code for CpObj nodes wich copy structs that have interleaved
-// GC pointers.
-// This will generate a sequence of movsq instructions for the cases of non-gc members
-// and calls to the BY_REF_ASSIGN helper otherwise.
-void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
-{
- // Make sure we got the arguments of the cpobj operation in the right registers
- GenTreePtr dstAddr = cpObjNode->Addr();
- GenTreePtr source = cpObjNode->Data();
- GenTreePtr srcAddr = nullptr;
- bool sourceIsLocal = false;
-
- assert(source->isContained());
- if (source->gtOper == GT_IND)
- {
- srcAddr = source->gtGetOp1();
- assert(!srcAddr->isContained());
- }
- else
- {
- noway_assert(source->IsLocal());
- sourceIsLocal = true;
- // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
- // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
- if (source->OperGet() == GT_LCL_VAR)
- {
- source->SetOper(GT_LCL_VAR_ADDR);
- }
- else
- {
- assert(source->OperGet() == GT_LCL_FLD);
- source->SetOper(GT_LCL_FLD_ADDR);
- }
- srcAddr = source;
- }
-
- bool dstOnStack = dstAddr->OperIsLocalAddr();
-
-#ifdef DEBUG
- bool isRepMovsqUsed = false;
-
- assert(!dstAddr->isContained());
-
- // If the GenTree node has data about GC pointers, this means we're dealing
- // with CpObj, so this requires special logic.
- assert(cpObjNode->gtGcPtrCount > 0);
-
- // MovSq instruction is used for copying non-gcref fields and it needs
- // src = RSI and dst = RDI.
- // Either these registers must not contain lclVars, or they must be dying or marked for spill.
- // This is because these registers are incremented as we go through the struct.
- GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
- GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
- unsigned srcLclVarNum = BAD_VAR_NUM;
- unsigned dstLclVarNum = BAD_VAR_NUM;
- bool isSrcAddrLiveOut = false;
- bool isDstAddrLiveOut = false;
- if (genIsRegCandidateLocal(actualSrcAddr))
- {
- srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
- isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
- }
- if (genIsRegCandidateLocal(actualDstAddr))
- {
- dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
- isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
- }
- assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
- ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
- assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
- ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
-#endif // DEBUG
-
- // Consume these registers.
- // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- if (sourceIsLocal)
- {
- inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF);
- genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA);
- }
- else
- {
- genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
- }
- gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
- gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
-
- unsigned slots = cpObjNode->gtSlots;
-
- // If we can prove it's on the stack we don't need to use the write barrier.
- if (dstOnStack)
- {
- if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
- {
-#ifdef DEBUG
- // If the destination of the CpObj is on the stack
- // make sure we allocated RCX to emit rep movsq.
- regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
- assert(tmpReg == REG_RCX);
- isRepMovsqUsed = true;
-#endif // DEBUG
-
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
- instGen(INS_r_movsq);
- }
- else
- {
- // For small structs, it's better to emit a sequence of movsq than to
- // emit a rep movsq instruction.
- while (slots > 0)
- {
- instGen(INS_movsq);
- slots--;
- }
- }
- }
- else
- {
- BYTE* gcPtrs = cpObjNode->gtGcPtrs;
- unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
-
- unsigned i = 0;
- while (i < slots)
- {
- switch (gcPtrs[i])
- {
- case TYPE_GC_NONE:
- // Let's see if we can use rep movsq instead of a sequence of movsq instructions
- // to save cycles and code size.
- {
- unsigned nonGcSlotCount = 0;
-
- do
- {
- nonGcSlotCount++;
- i++;
- } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
-
- // If we have a very small contiguous non-gc region, it's better just to
- // emit a sequence of movsq instructions
- if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
- {
- while (nonGcSlotCount > 0)
- {
- instGen(INS_movsq);
- nonGcSlotCount--;
- }
- }
- else
- {
-#ifdef DEBUG
- // Otherwise, we can save code-size and improve CQ by emitting
- // rep movsq
- regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
- assert(tmpReg == REG_RCX);
- isRepMovsqUsed = true;
-#endif // DEBUG
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
- instGen(INS_r_movsq);
- }
- }
- break;
- default:
- // We have a GC pointer, call the memory barrier.
- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
- gcPtrCount--;
- i++;
- }
- }
-
- assert(gcPtrCount == 0);
- }
-
- // Clear the gcInfo for RSI and RDI.
- // While we normally update GC info prior to the last instruction that uses them,
- // these actually live into the helper call.
- gcInfo.gcMarkRegSetNpt(RBM_RSI);
- gcInfo.gcMarkRegSetNpt(RBM_RDI);
-}
-
-// Generate code for a CpBlk node by the means of the VM memcpy helper call
-// Preconditions:
-// a) The size argument of the CpBlk is not an integer constant
-// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
-void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
-{
-#ifdef _TARGET_AMD64_
- // Make sure we got the arguments of the cpblk operation in the right registers
- unsigned blockSize = cpBlkNode->Size();
- GenTreePtr dstAddr = cpBlkNode->Addr();
- GenTreePtr source = cpBlkNode->Data();
- GenTreePtr srcAddr = nullptr;
-
- // Size goes in arg2
- if (blockSize != 0)
- {
- assert(blockSize >= CPBLK_MOVS_LIMIT);
- assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0);
- }
- else
- {
- noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK);
- }
-
- // Source address goes in arg1
- if (source->gtOper == GT_IND)
- {
- srcAddr = source->gtGetOp1();
- assert(!srcAddr->isContained());
- }
- else
- {
- noway_assert(source->IsLocal());
- assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0);
- inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF);
- }
-
- genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
- genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
-#else // !_TARGET_AMD64_
- noway_assert(false && "Helper call for CpBlk is not needed.");
-#endif // !_TARGET_AMD64_
-}
-
-// generate code do a switch statement based on a table of ip-relative offsets
-void CodeGen::genTableBasedSwitch(GenTree* treeNode)
-{
- genConsumeOperands(treeNode->AsOp());
- regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
- regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
-
- regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
-
- // load the ip-relative offset (which is relative to start of fgFirstBB)
- getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
-
- // add it to the absolute address of fgFirstBB
- compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
- getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
- getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
- // jmp baseReg
- getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
-}
-
-// emits the table and an instruction to get the address of the first element
-void CodeGen::genJumpTable(GenTree* treeNode)
-{
- noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
- assert(treeNode->OperGet() == GT_JMPTABLE);
-
- unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
- BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
- unsigned jmpTabOffs;
- unsigned jmpTabBase;
-
- jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
-
- jmpTabOffs = 0;
-
- JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
-
- for (unsigned i = 0; i < jumpCount; i++)
- {
- BasicBlock* target = *jumpTable++;
- noway_assert(target->bbFlags & BBF_JMP_TARGET);
-
- JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
-
- getEmitter()->emitDataGenData(i, target);
- };
-
- getEmitter()->emitDataGenEnd();
-
- // Access to inline data is 'abstracted' by a special type of static member
- // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
- // to constant data, not a real static field.
- getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum,
- compiler->eeFindJitDataOffs(jmpTabBase), 0);
- genProduceReg(treeNode);
-}
-
-// generate code for the locked operations:
-// GT_LOCKADD, GT_XCHG, GT_XADD
-void CodeGen::genLockedInstructions(GenTree* treeNode)
-{
- GenTree* data = treeNode->gtOp.gtOp2;
- GenTree* addr = treeNode->gtOp.gtOp1;
- regNumber targetReg = treeNode->gtRegNum;
- regNumber dataReg = data->gtRegNum;
- regNumber addrReg = addr->gtRegNum;
- instruction ins;
-
- // all of these nodes implicitly do an indirection on op1
- // so create a temporary node to feed into the pattern matching
- GenTreeIndir i = indirForm(data->TypeGet(), addr);
- genConsumeReg(addr);
-
- // The register allocator should have extended the lifetime of the address
- // so that it is not used as the target.
- noway_assert(addrReg != targetReg);
-
- // If data is a lclVar that's not a last use, we'd better have allocated a register
- // for the result (except in the case of GT_LOCKADD which does not produce a register result).
- assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
- (data->gtFlags & GTF_VAR_DEATH) != 0);
-
- genConsumeIfReg(data);
- if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
- {
- inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
- data->gtRegNum = targetReg;
-
- // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
- // original gtRegNum on data, after calling emitInsBinary below.
- }
- switch (treeNode->OperGet())
- {
- case GT_LOCKADD:
- instGen(INS_lock);
- ins = INS_add;
- break;
- case GT_XCHG:
- // lock is implied by xchg
- ins = INS_xchg;
- break;
- case GT_XADD:
- instGen(INS_lock);
- ins = INS_xadd;
- break;
- default:
- unreached();
- }
- getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
-
- if (treeNode->gtRegNum != REG_NA)
- {
- genProduceReg(treeNode);
- }
-}
-
-// generate code for BoundsCheck nodes
-void CodeGen::genRangeCheck(GenTreePtr oper)
-{
-#ifdef FEATURE_SIMD
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
-#else // !FEATURE_SIMD
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
-#endif // !FEATURE_SIMD
-
- GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
-
- GenTreePtr arrLen = bndsChk->gtArrLen;
- GenTreePtr arrIndex = bndsChk->gtIndex;
- GenTreePtr arrRef = nullptr;
- int lenOffset = 0;
-
- GenTree * src1, *src2;
- emitJumpKind jmpKind;
-
- genConsumeRegs(arrLen);
- genConsumeRegs(arrIndex);
-
- if (arrIndex->isContainedIntOrIImmed())
- {
- // arrIndex is a contained constant. In this case
- // we will generate one of the following
- // cmp [mem], immed (if arrLen is a memory op)
- // cmp reg, immed (if arrLen is in a reg)
- //
- // That is arrLen cannot be a contained immed.
- assert(!arrLen->isContainedIntOrIImmed());
-
- src1 = arrLen;
- src2 = arrIndex;
- jmpKind = EJ_jbe;
- }
- else
- {
- // arrIndex could either be a contained memory op or a reg
- // In this case we will generate one of the following
- // cmp [mem], immed (if arrLen is a constant)
- // cmp [mem], reg (if arrLen is in a reg)
- // cmp reg, immed (if arrIndex is in a reg)
- // cmp reg1, reg2 (if arraIndex is in reg1)
- // cmp reg, [mem] (if arrLen is a memory op)
- //
- // That is only one of arrIndex or arrLen can be a memory op.
- assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
-
- src1 = arrIndex;
- src2 = arrLen;
- jmpKind = EJ_jae;
- }
-
- var_types bndsChkType = src2->TypeGet();
-#if DEBUG
- // Bounds checks can only be 32 or 64 bit sized comparisons.
- assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
-
- // The type of the bounds check should always wide enough to compare against the index.
- assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
-#endif // DEBUG
-
- getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
- genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
-}
-
-//------------------------------------------------------------------------
-// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
-// lower bound for the given dimension.
-//
-// Arguments:
-// elemType - the element type of the array
-// rank - the rank of the array
-// dimension - the dimension for which the lower bound offset will be returned.
-//
-// Return Value:
-// The offset.
-
-unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
-{
- // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
- return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
-}
-
-//------------------------------------------------------------------------
-// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
-// size for the given dimension.
-//
-// Arguments:
-// elemType - the element type of the array
-// rank - the rank of the array
-// dimension - the dimension for which the lower bound offset will be returned.
-//
-// Return Value:
-// The offset.
-
-unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
-{
- // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
- return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
-}
-
-//------------------------------------------------------------------------
-// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
-// producing the effective index by subtracting the lower bound.
-//
-// Arguments:
-// arrIndex - the node for which we're generating code
-//
-// Return Value:
-// None.
-//
-
-void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
-{
- GenTreePtr arrObj = arrIndex->ArrObj();
- GenTreePtr indexNode = arrIndex->IndexExpr();
-
- regNumber arrReg = genConsumeReg(arrObj);
- regNumber indexReg = genConsumeReg(indexNode);
- regNumber tgtReg = arrIndex->gtRegNum;
-
- unsigned dim = arrIndex->gtCurrDim;
- unsigned rank = arrIndex->gtArrRank;
- var_types elemType = arrIndex->gtArrElemType;
-
- noway_assert(tgtReg != REG_NA);
-
- // Subtract the lower bound for this dimension.
- // TODO-XArch-CQ: make this contained if it's an immediate that fits.
- if (tgtReg != indexReg)
- {
- inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
- }
- getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
- genOffsetOfMDArrayLowerBound(elemType, rank, dim));
- getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
- genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
- genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
-
- genProduceReg(arrIndex);
-}
-
-//------------------------------------------------------------------------
-// genCodeForArrOffset: Generates code to compute the flattened array offset for
-// one dimension of an array reference:
-// result = (prevDimOffset * dimSize) + effectiveIndex
-// where dimSize is obtained from the arrObj operand
-//
-// Arguments:
-// arrOffset - the node for which we're generating code
-//
-// Return Value:
-// None.
-//
-// Notes:
-// dimSize and effectiveIndex are always non-negative, the former by design,
-// and the latter because it has been normalized to be zero-based.
-
-void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
-{
- GenTreePtr offsetNode = arrOffset->gtOffset;
- GenTreePtr indexNode = arrOffset->gtIndex;
- GenTreePtr arrObj = arrOffset->gtArrObj;
-
- regNumber tgtReg = arrOffset->gtRegNum;
-
- noway_assert(tgtReg != REG_NA);
-
- unsigned dim = arrOffset->gtCurrDim;
- unsigned rank = arrOffset->gtArrRank;
- var_types elemType = arrOffset->gtArrElemType;
-
- // We will use a temp register for the offset*scale+effectiveIndex computation.
- regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
- regNumber tmpReg = genRegNumFromMask(tmpRegMask);
-
- // First, consume the operands in the correct order.
- regNumber offsetReg = REG_NA;
- if (!offsetNode->IsIntegralConst(0))
- {
- offsetReg = genConsumeReg(offsetNode);
- }
- else
- {
- assert(offsetNode->isContained());
- }
- regNumber indexReg = genConsumeReg(indexNode);
- // Although arrReg may not be used in the constant-index case, if we have generated
- // the value into a register, we must consume it, otherwise we will fail to end the
- // live range of the gc ptr.
- // TODO-CQ: Currently arrObj will always have a register allocated to it.
- // We could avoid allocating a register for it, which would be of value if the arrObj
- // is an on-stack lclVar.
- regNumber arrReg = REG_NA;
- if (arrObj->gtHasReg())
- {
- arrReg = genConsumeReg(arrObj);
- }
-
- if (!offsetNode->IsIntegralConst(0))
- {
- // Evaluate tgtReg = offsetReg*dim_size + indexReg.
- // tmpReg is used to load dim_size and the result of the multiplication.
- // Note that dim_size will never be negative.
-
- getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg,
- genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
- inst_RV_RV(INS_imul, tmpReg, offsetReg);
-
- if (tmpReg == tgtReg)
- {
- inst_RV_RV(INS_add, tmpReg, indexReg);
- }
- else
- {
- if (indexReg != tgtReg)
- {
- inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
- }
- inst_RV_RV(INS_add, tgtReg, tmpReg);
- }
- }
- else
- {
- if (indexReg != tgtReg)
- {
- inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
- }
- }
- genProduceReg(arrOffset);
-}
-
-// make a temporary indir we can feed to pattern matching routines
-// in cases where we don't want to instantiate all the indirs that happen
-//
-GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
-{
- GenTreeIndir i(GT_IND, type, base, nullptr);
- i.gtRegNum = REG_NA;
- // has to be nonnull (because contained nodes can't be the last in block)
- // but don't want it to be a valid pointer
- i.gtNext = (GenTree*)(-1);
- return i;
-}
-
-// make a temporary int we can feed to pattern matching routines
-// in cases where we don't want to instantiate
-//
-GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
-{
- GenTreeIntCon i(type, value);
- i.gtRegNum = REG_NA;
- // has to be nonnull (because contained nodes can't be the last in block)
- // but don't want it to be a valid pointer
- i.gtNext = (GenTree*)(-1);
- return i;
-}
-
-instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
-{
- instruction ins;
-
- // Operations on SIMD vectors shouldn't come this path
- assert(!varTypeIsSIMD(type));
- if (varTypeIsFloating(type))
- {
- return ins_MathOp(oper, type);
- }
-
- switch (oper)
- {
- case GT_ADD:
- ins = INS_add;
- break;
- case GT_AND:
- ins = INS_and;
- break;
- case GT_LSH:
- ins = INS_shl;
- break;
- case GT_MUL:
- ins = INS_imul;
- break;
- case GT_NEG:
- ins = INS_neg;
- break;
- case GT_NOT:
- ins = INS_not;
- break;
- case GT_OR:
- ins = INS_or;
- break;
- case GT_ROL:
- ins = INS_rol;
- break;
- case GT_ROR:
- ins = INS_ror;
- break;
- case GT_RSH:
- ins = INS_sar;
- break;
- case GT_RSZ:
- ins = INS_shr;
- break;
- case GT_SUB:
- ins = INS_sub;
- break;
- case GT_XOR:
- ins = INS_xor;
- break;
-#if !defined(_TARGET_64BIT_)
- case GT_ADD_LO:
- ins = INS_add;
- break;
- case GT_ADD_HI:
- ins = INS_adc;
- break;
- case GT_SUB_LO:
- ins = INS_sub;
- break;
- case GT_SUB_HI:
- ins = INS_sbb;
- break;
-#endif // !defined(_TARGET_64BIT_)
- default:
- unreached();
- break;
- }
- return ins;
-}
-
-//------------------------------------------------------------------------
-// genCodeForShift: Generates the code sequence for a GenTree node that
-// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
-//
-// Arguments:
-// tree - the bit shift node (that specifies the type of bit shift to perform).
-//
-// Assumptions:
-// a) All GenTrees are register allocated.
-// b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or
-// it's a register-allocated expression. If it is in a register that is
-// not RCX, it will be moved to RCX (so RCX better not be in use!).
-//
-void CodeGen::genCodeForShift(GenTreePtr tree)
-{
- // Only the non-RMW case here.
- assert(tree->OperIsShiftOrRotate());
- assert(!tree->gtOp.gtOp1->isContained());
- assert(tree->gtRegNum != REG_NA);
-
- genConsumeOperands(tree->AsOp());
-
- var_types targetType = tree->TypeGet();
- instruction ins = genGetInsForOper(tree->OperGet(), targetType);
-
- GenTreePtr operand = tree->gtGetOp1();
- regNumber operandReg = operand->gtRegNum;
-
- GenTreePtr shiftBy = tree->gtGetOp2();
- if (shiftBy->isContainedIntOrIImmed())
- {
- // First, move the operand to the destination register and
- // later on perform the shift in-place.
- // (LSRA will try to avoid this situation through preferencing.)
- if (tree->gtRegNum != operandReg)
- {
- inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
- }
-
- int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
- inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue);
- }
- else
- {
- // We must have the number of bits to shift stored in ECX, since we constrained this node to
- // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
- // register destination requirement.
- regNumber shiftReg = shiftBy->gtRegNum;
- if (shiftReg != REG_RCX)
- {
- // Issue the mov to RCX:
- inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
- }
-
- // The operand to be shifted must not be in ECX
- noway_assert(operandReg != REG_RCX);
-
- if (tree->gtRegNum != operandReg)
- {
- inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
- }
- inst_RV_CL(ins, tree->gtRegNum, targetType);
- }
-
- genProduceReg(tree);
-}
-
-//------------------------------------------------------------------------
-// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
-// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
-// GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) )
-//
-// Arguments:
-// storeIndNode: the GT_STOREIND node.
-//
-void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
-{
- GenTree* data = storeInd->Data();
- GenTree* addr = storeInd->Addr();
-
- assert(data->OperIsShiftOrRotate());
-
- // This function only handles the RMW case.
- assert(data->gtOp.gtOp1->isContained());
- assert(data->gtOp.gtOp1->isIndir());
- assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
- assert(data->gtRegNum == REG_NA);
-
- var_types targetType = data->TypeGet();
- genTreeOps oper = data->OperGet();
- instruction ins = genGetInsForOper(oper, targetType);
- emitAttr attr = EA_ATTR(genTypeSize(targetType));
-
- GenTree* shiftBy = data->gtOp.gtOp2;
- if (shiftBy->isContainedIntOrIImmed())
- {
- int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
- ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
- if (shiftByValue == 1)
- {
- // There is no source in this case, as the shift by count is embedded in the instruction opcode itself.
- getEmitter()->emitInsRMW(ins, attr, storeInd);
- }
- else
- {
- getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy);
- }
- }
- else
- {
- // We must have the number of bits to shift stored in ECX, since we constrained this node to
- // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
- // register destination requirement.
- regNumber shiftReg = shiftBy->gtRegNum;
- if (shiftReg != REG_RCX)
- {
- // Issue the mov to RCX:
- inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
- }
-
- // The shiftBy operand is implicit, so call the unary version of emitInsRMW.
- getEmitter()->emitInsRMW(ins, attr, storeInd);
- }
-}
-
-void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
-{
- regNumber dstReg = tree->gtRegNum;
- GenTree* unspillTree = tree;
-
- if (tree->gtOper == GT_RELOAD)
- {
- unspillTree = tree->gtOp.gtOp1;
- }
-
- if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
- {
- if (genIsRegCandidateLocal(unspillTree))
- {
- // Reset spilled flag, since we are going to load a local variable from its home location.
- unspillTree->gtFlags &= ~GTF_SPILLED;
-
- GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- // Load local variable from its home location.
- // In most cases the tree type will indicate the correct type to use for the load.
- // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
- // widened when loaded into a register), and its size is not the same as genActualType of
- // the type of the lclVar, then we need to change the type of the tree node when loading.
- // This situation happens due to "optimizations" that avoid a cast and
- // simply retype the node when using long type lclVar as an int.
- // While loading the int in that case would work for this use of the lclVar, if it is
- // later used as a long, we will have incorrectly truncated the long.
- // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
- // extending load.
-
- var_types treeType = unspillTree->TypeGet();
- if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
- {
- assert(!varTypeIsGC(varDsc));
- var_types spillType = genActualType(varDsc->lvType);
- unspillTree->gtType = spillType;
- inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
- unspillTree->gtType = treeType;
- }
- else
- {
- inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
- }
-
- unspillTree->SetInReg();
-
- // TODO-Review: We would like to call:
- // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
- // instead of the following code, but this ends up hitting this assert:
- // assert((regSet.rsMaskVars & regMask) == 0);
- // due to issues with LSRA resolution moves.
- // So, just force it for now. This probably indicates a condition that creates a GC hole!
- //
- // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
- // because the variable is not really going live or dead, but that method is somewhat poorly
- // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
- // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
-
- // Don't update the variable's location if we are just re-spilling it again.
-
- if ((unspillTree->gtFlags & GTF_SPILL) == 0)
- {
- genUpdateVarReg(varDsc, tree);
-#ifdef DEBUG
- if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
- varDsc->PrintVarReg();
- printf(" is becoming live ");
- compiler->printTreeID(unspillTree);
- printf("\n");
- }
-#endif // DEBUG
-
- regSet.AddMaskVars(genGetRegMask(varDsc));
- }
-
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- else if (unspillTree->IsMultiRegCall())
- {
- GenTreeCall* call = unspillTree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- GenTreeCopyOrReload* reloadTree = nullptr;
- if (tree->OperGet() == GT_RELOAD)
- {
- reloadTree = tree->AsCopyOrReload();
- }
-
- // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
- // one or more of its result regs are spilled. Call node needs to be
- // queried to know which specific result regs to be unspilled.
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILLED) != 0)
- {
- var_types dstType = retTypeDesc->GetReturnRegType(i);
- regNumber unspillTreeReg = call->GetRegNumByIdx(i);
-
- if (reloadTree != nullptr)
- {
- dstReg = reloadTree->GetRegNumByIdx(i);
- if (dstReg == REG_NA)
- {
- dstReg = unspillTreeReg;
- }
- }
- else
- {
- dstReg = unspillTreeReg;
- }
-
- TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
- getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
- 0);
- compiler->tmpRlsTemp(t);
- gcInfo.gcMarkRegPtrVal(dstReg, dstType);
- }
- }
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- }
- else
- {
- TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
- getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
- t->tdTempNum(), 0);
- compiler->tmpRlsTemp(t);
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- }
-}
-
-// Do Liveness update for a subnodes that is being consumed by codegen
-// including the logic for reload in case is needed and also takes care
-// of locating the value on the desired register.
-void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
-{
- if (needReg == REG_NA)
- {
- return;
- }
- regNumber treeReg = genConsumeReg(tree);
- if (treeReg != needReg)
- {
- inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
- }
-}
-
-void CodeGen::genRegCopy(GenTree* treeNode)
-{
- assert(treeNode->OperGet() == GT_COPY);
- GenTree* op1 = treeNode->gtOp.gtOp1;
-
- if (op1->IsMultiRegCall())
- {
- genConsumeReg(op1);
-
- GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload();
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber fromReg = call->GetRegNumByIdx(i);
- regNumber toReg = copyTree->GetRegNumByIdx(i);
-
- // A Multi-reg GT_COPY node will have valid reg only for those
- // positions that corresponding result reg of call node needs
- // to be copied.
- if (toReg != REG_NA)
- {
- assert(toReg != fromReg);
- inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
- }
- }
- }
- else
- {
- var_types targetType = treeNode->TypeGet();
- regNumber targetReg = treeNode->gtRegNum;
- assert(targetReg != REG_NA);
-
- // Check whether this node and the node from which we're copying the value have
- // different register types. This can happen if (currently iff) we have a SIMD
- // vector type that fits in an integer register, in which case it is passed as
- // an argument, or returned from a call, in an integer register and must be
- // copied if it's in an xmm register.
-
- bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
- bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
- if (srcFltReg != tgtFltReg)
- {
- instruction ins;
- regNumber fpReg;
- regNumber intReg;
- if (tgtFltReg)
- {
- ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
- fpReg = targetReg;
- intReg = op1->gtRegNum;
- }
- else
- {
- ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
- intReg = targetReg;
- fpReg = op1->gtRegNum;
- }
- inst_RV_RV(ins, fpReg, intReg, targetType);
- }
- else
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
- }
-
- if (op1->IsLocal())
- {
- // The lclVar will never be a def.
- // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
- // appropriately set the gcInfo for the copied value.
- // If not, there are two cases we need to handle:
- // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
- // will remain live in its original register.
- // genProduceReg() will appropriately set the gcInfo for the copied value,
- // and genConsumeReg will reset it.
- // - Otherwise, we need to update register info for the lclVar.
-
- GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
- assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
-
- if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
- {
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- // If we didn't just spill it (in genConsumeReg, above), then update the register info
- if (varDsc->lvRegNum != REG_STK)
- {
- // The old location is dying
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
-
- gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
-
- genUpdateVarReg(varDsc, treeNode);
-
- // The new location is going live
- genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
- }
- }
- }
- }
-
- genProduceReg(treeNode);
-}
-
-// Check that registers are consumed in the right order for the current node being generated.
-#ifdef DEBUG
-void CodeGen::genCheckConsumeNode(GenTree* treeNode)
-{
- // GT_PUTARG_REG is consumed out of order.
- if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
- {
- if (lastConsumedNode != nullptr)
- {
- if (treeNode == lastConsumedNode)
- {
- if (verbose)
- {
- printf("Node was consumed twice:\n ");
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
- }
- else
- {
- if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
- {
- printf("Nodes were consumed out-of-order:\n");
- compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
- // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
- }
- }
- lastConsumedNode = treeNode;
- }
-}
-#endif // DEBUG
-
-//--------------------------------------------------------------------
-// genConsumeReg: Do liveness update for a subnode that is being
-// consumed by codegen.
-//
-// Arguments:
-// tree - GenTree node
-//
-// Return Value:
-// Returns the reg number of tree.
-// In case of multi-reg call node returns the first reg number
-// of the multi-reg return.
-regNumber CodeGen::genConsumeReg(GenTree* tree)
-{
- if (tree->OperGet() == GT_COPY)
- {
- genRegCopy(tree);
- }
-
- // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
- // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
- // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
- // always using GT_COPY to make the lclVar location explicit.
- // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
- // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
- // the lclVar (normally when a lclVar is spilled it is then used from its former register
- // location, which matches the gtRegNum on the node).
- // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
- // because if it's on the stack it will always get reloaded into tree->gtRegNum).
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
- {
- inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
- }
- }
-
- genUnspillRegIfNeeded(tree);
-
- // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
- genUpdateLife(tree);
-
- assert(tree->gtHasReg());
-
- // there are three cases where consuming a reg means clearing the bit in the live mask
- // 1. it was not produced by a local
- // 2. it was produced by a local that is going dead
- // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
-
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- assert(varDsc->lvLRACandidate);
-
- if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
- }
- else if (varDsc->lvRegNum == REG_STK)
- {
- // We have loaded this into a register only temporarily
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
- }
-
- genCheckConsumeNode(tree);
- return tree->gtRegNum;
-}
-
-// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
-void CodeGen::genConsumeAddress(GenTree* addr)
-{
- if (addr->OperGet() == GT_LEA)
- {
- genConsumeAddrMode(addr->AsAddrMode());
- }
- else if (!addr->isContained())
- {
- genConsumeReg(addr);
- }
-}
-
-// do liveness update for a subnode that is being consumed by codegen
-void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
-{
- genConsumeOperands(addr);
-}
-
-void CodeGen::genConsumeRegs(GenTree* tree)
-{
-#if !defined(_TARGET_64BIT_)
- if (tree->OperGet() == GT_LONG)
- {
- genConsumeRegs(tree->gtGetOp1());
- genConsumeRegs(tree->gtGetOp2());
- return;
- }
-#endif // !defined(_TARGET_64BIT_)
-
- if (tree->isContained())
- {
- if (tree->isContainedSpillTemp())
- {
- // spill temps are un-tracked and hence no need to update life
- }
- else if (tree->isIndir())
- {
- genConsumeAddress(tree->AsIndir()->Addr());
- }
- else if (tree->OperGet() == GT_AND)
- {
- // This is the special contained GT_AND that we created in Lowering::LowerCmp()
- // Now we need to consume the operands of the GT_AND node.
- genConsumeOperands(tree->AsOp());
- }
- else if (tree->OperGet() == GT_LCL_VAR)
- {
- // A contained lcl var must be living on stack and marked as reg optional.
- unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-
- noway_assert(varDsc->lvRegNum == REG_STK);
- noway_assert(tree->IsRegOptional());
-
- // Update the life of reg optional lcl var.
- genUpdateLife(tree);
- }
- else
- {
- assert(tree->OperIsLeaf());
- }
- }
- else
- {
- genConsumeReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
-//
-// Arguments:
-// tree - the GenTreeOp whose operands will have their liveness updated.
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Note that this logic is localized here because we must do the liveness update in
-// the correct execution order. This is important because we may have two operands
-// that involve the same lclVar, and if one is marked "lastUse" we must handle it
-// after the first.
-
-void CodeGen::genConsumeOperands(GenTreeOp* tree)
-{
- GenTree* firstOp = tree->gtOp1;
- GenTree* secondOp = tree->gtOp2;
- if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
- {
- assert(secondOp != nullptr);
- firstOp = secondOp;
- secondOp = tree->gtOp1;
- }
- if (firstOp != nullptr)
- {
- genConsumeRegs(firstOp);
- }
- if (secondOp != nullptr)
- {
- genConsumeRegs(secondOp);
- }
-}
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-//------------------------------------------------------------------------
-// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
-// Also loads in the right register the addresses of the
-// src/dst for rep mov operation.
-//
-// Arguments:
-// putArgNode - the PUTARG_STK tree.
-// dstReg - the dstReg for the rep move operation.
-// srcReg - the srcReg for the rep move operation.
-// sizeReg - the sizeReg for the rep move operation.
-// baseVarNum - the varnum for the local used for placing the "by-value" args on the stack.
-//
-// Return Value:
-// None.
-//
-// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
-// for copying on the stack a struct with references.
-// The source address/offset is determined from the address on the GT_OBJ node, while
-// the destination address is the address contained in 'baseVarNum' plus the offset
-// provided in the 'putArgNode'.
-
-void CodeGen::genConsumePutStructArgStk(
- GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
-{
- assert(varTypeIsStruct(putArgNode));
- assert(baseVarNum != BAD_VAR_NUM);
-
- // The putArgNode children are always contained. We should not consume any registers.
- assert(putArgNode->gtGetOp1()->isContained());
-
- GenTree* dstAddr = putArgNode;
-
- // Get the source address.
- GenTree* src = putArgNode->gtGetOp1();
- assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
- GenTree* srcAddr = src->gtGetOp1();
-
- size_t size = putArgNode->getArgSize();
-
- assert(dstReg != REG_NA);
- assert(srcReg != REG_NA);
-
- // Consume the registers only if they are not contained or set to REG_NA.
- if (srcAddr->gtRegNum != REG_NA)
- {
- genConsumeReg(srcAddr);
- }
-
- // If the op1 is already in the dstReg - nothing to do.
- // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
- if (dstAddr->gtRegNum != dstReg)
- {
- // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
- // for tail calls) in RDI.
- // Destination is always local (on the stack) - use EA_PTRSIZE.
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
- }
-
- if (srcAddr->gtRegNum != srcReg)
- {
- if (srcAddr->OperIsLocalAddr())
- {
- // The OperLocalAddr is always contained.
- assert(srcAddr->isContained());
- GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
-
- // Generate LEA instruction to load the LclVar address in RSI.
- // Source is known to be on the stack. Use EA_PTRSIZE.
- unsigned int offset = 0;
- if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
- {
- offset = srcAddr->AsLclFld()->gtLclOffs;
- }
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
- }
- else
- {
- assert(srcAddr->gtRegNum != REG_NA);
- // Source is not known to be on the stack. Use EA_BYREF.
- getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
- }
- }
-
- if (sizeReg != REG_NA)
- {
- inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
- }
-}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
-//------------------------------------------------------------------------
-// genConsumeBlockSize: Ensure that the block size is in the given register
-//
-// Arguments:
-// blkNode - The block node
-// sizeReg - The register into which the block's size should go
-//
-
-void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
-{
- unsigned blockSize = blkNode->Size();
- if (sizeReg != REG_NA)
- {
- if (blockSize != 0)
- {
- assert(blkNode->gtRsvdRegs == genRegMask(sizeReg));
- genSetRegToIcon(sizeReg, blockSize);
- }
- else
- {
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- genConsumeRegAndCopy(blkNode->AsDynBlk()->gtDynamicSize, sizeReg);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockDst: Ensure that the block destination address is in its
-// allocated register.
-// Arguments:
-// blkNode - The block node
-//
-
-void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode)
-{
- GenTree* dstAddr = blkNode->Addr();
- genConsumeReg(dstAddr);
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockSrc: Ensure that the block source address is in its
-// allocated register if it is non-local.
-// Arguments:
-// blkNode - The block node
-//
-// Return Value: // Returns the source address node, if it is non-local, // and nullptr otherwise.
-GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
-{
- GenTree* src = blkNode->Data();
- if (blkNode->OperIsCopyBlkOp())
- {
- // For a CopyBlk we need the address of the source.
- if (src->OperGet() == GT_IND)
- {
- src = src->gtOp.gtOp1;
- }
- else
- {
- // This must be a local.
- // For this case, there is no source address register, as it is a
- // stack-based address.
- assert(src->OperIsLocal());
- return nullptr;
- }
- }
- genConsumeReg(src);
- return src;
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockOp: Ensure that the block's operands are enregistered
-// as needed.
-// Arguments:
-// blkNode - The block node
-//
-// Notes:
-// This ensures that the operands are consumed in the proper order to
-// obey liveness modeling.
-
-void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
-{
- // We have to consume the registers, and perform any copies, in the actual execution order.
- // The nominal order is: dst, src, size. However this may have been changed
- // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
- // block size.
- // Note that the register allocator ensures that the registers ON THE NODES will not interfere
- // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
- // Further, it ensures that they will not interfere with one another if they are then copied
- // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
- // then, that we first consume all the operands, then do any necessary moves.
-
- GenTree* dstAddr = blkNode->Addr();
- GenTree* src = nullptr;
- unsigned blockSize = blkNode->Size();
- GenTree* size = nullptr;
- bool evalSizeFirst = true;
-
- if (blkNode->OperGet() == GT_STORE_DYN_BLK)
- {
- evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst;
- size = blkNode->AsDynBlk()->gtDynamicSize;
- }
-
- // First, consusme all the sources in order
- if (evalSizeFirst)
- {
- genConsumeBlockSize(blkNode, sizeReg);
- }
- if (blkNode->IsReverseOp())
- {
- src = genConsumeBlockSrc(blkNode);
- genConsumeBlockDst(blkNode);
- }
- else
- {
- genConsumeBlockDst(blkNode);
- src = genConsumeBlockSrc(blkNode);
- }
- if (!evalSizeFirst)
- {
- genConsumeBlockSize(blkNode, sizeReg);
- }
- // Next, perform any necessary moves.
- if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg))
- {
- inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
- }
- if (blkNode->IsReverseOp())
- {
- if ((src != nullptr) && (src->gtRegNum != srcReg))
- {
- inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
- }
- if (dstAddr->gtRegNum != dstReg)
- {
- inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
- }
- }
- else
- {
- if (dstAddr->gtRegNum != dstReg)
- {
- inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
- }
- if ((src != nullptr) && (src->gtRegNum != srcReg))
- {
- inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
- }
- }
- if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg))
- {
- inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
- }
-}
-
-//-------------------------------------------------------------------------
-// genProduceReg: do liveness update for register produced by the current
-// node in codegen.
-//
-// Arguments:
-// tree - Gentree node
-//
-// Return Value:
-// None.
-void CodeGen::genProduceReg(GenTree* tree)
-{
- if (tree->gtFlags & GTF_SPILL)
- {
- // Code for GT_COPY node gets generated as part of consuming regs by its parent.
- // A GT_COPY node in turn produces reg result and it should never be marked to
- // spill.
- //
- // Similarly GT_RELOAD node gets generated as part of consuming regs by its
- // parent and should never be marked for spilling.
- noway_assert(!tree->IsCopyOrReload());
-
- if (genIsRegCandidateLocal(tree))
- {
- // Store local variable to its home location.
- tree->gtFlags &= ~GTF_REG_VAL;
- // Ensure that lclVar stores are typed correctly.
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
- (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
- inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
- }
- else
- {
- // In case of multi-reg call node, spill flag on call node
- // indicates that one or more of its allocated regs need to
- // be spilled. Call node needs to be further queried to
- // know which of its result regs needs to be spilled.
- if (tree->IsMultiRegCall())
- {
- GenTreeCall* call = tree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILL) != 0)
- {
- regNumber reg = call->GetRegNumByIdx(i);
- call->SetInReg();
- regSet.rsSpillTree(reg, call, i);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- }
- }
- }
- else
- {
- tree->SetInReg();
- regSet.rsSpillTree(tree->gtRegNum, tree);
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
-
- tree->gtFlags |= GTF_SPILLED;
- tree->gtFlags &= ~GTF_SPILL;
-
- return;
- }
- }
-
- genUpdateLife(tree);
-
- // If we've produced a register, mark it as a pointer, as needed.
- if (tree->gtHasReg())
- {
- // We only mark the register in the following cases:
- // 1. It is not a register candidate local. In this case, we're producing a
- // register from a local, but the local is not a register candidate. Thus,
- // we must be loading it as a temp register, and any "last use" flag on
- // the register wouldn't be relevant.
- // 2. The register candidate local is going dead. There's no point to mark
- // the register as live, with a GC pointer, if the variable is dead.
- if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
- {
- // Multi-reg call node will produce more than one register result.
- // Mark all the regs produced by call node.
- if (tree->IsMultiRegCall())
- {
- GenTreeCall* call = tree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- regNumber reg = call->GetRegNumByIdx(i);
- var_types type = retTypeDesc->GetReturnRegType(i);
- gcInfo.gcMarkRegPtrVal(reg, type);
- }
- }
- else if (tree->IsCopyOrReloadOfMultiRegCall())
- {
- // we should never see reload of multi-reg call here
- // because GT_RELOAD gets generated in reg consuming path.
- noway_assert(tree->OperGet() == GT_COPY);
-
- // A multi-reg GT_COPY node produces those regs to which
- // copy has taken place.
- GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
- GenTreeCall* call = copy->gtGetOp1()->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber fromReg = call->GetRegNumByIdx(i);
- regNumber toReg = copy->GetRegNumByIdx(i);
-
- if (toReg != REG_NA)
- {
- gcInfo.gcMarkRegPtrVal(toReg, type);
- }
- }
- }
- else
- {
- gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
- }
- }
- }
- tree->SetInReg();
-}
-
-// transfer gc/byref status of src reg to dst reg
-void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
-{
- regMaskTP srcMask = genRegMask(src);
- regMaskTP dstMask = genRegMask(dst);
-
- if (gcInfo.gcRegGCrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetGCref(dstMask);
- }
- else if (gcInfo.gcRegByrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetByref(dstMask);
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(dstMask);
- }
-}
-
-// generates an ip-relative call or indirect call via reg ('call reg')
-// pass in 'addr' for a relative call or 'base' for a indirect register call
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset,
- regNumber base,
- bool isJump,
- bool isNoGC)
-{
-#if !defined(_TARGET_X86_)
- ssize_t argSize = 0;
-#endif // !defined(_TARGET_X86_)
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
- emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
-}
-
-// generates an indirect call via addressing mode (call []) given an indir node
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset)
-{
-#if !defined(_TARGET_X86_)
- ssize_t argSize = 0;
-#endif // !defined(_TARGET_X86_)
- genConsumeAddress(indir->Addr());
-
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
- argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
- indir->Base() ? indir->Base()->gtRegNum : REG_NA,
- indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
-}
-
-//------------------------------------------------------------------------
-// genStoreInd: Generate code for a GT_STOREIND node.
-//
-// Arguments:
-// treeNode - The GT_STOREIND node for which to generate code.
-//
-// Return Value:
-// none
-
-void CodeGen::genStoreInd(GenTreePtr node)
-{
- assert(node->OperGet() == GT_STOREIND);
-
-#ifdef FEATURE_SIMD
- // Storing Vector3 of size 12 bytes through indirection
- if (node->TypeGet() == TYP_SIMD12)
- {
- genStoreIndTypeSIMD12(node);
- return;
- }
-#endif // FEATURE_SIMD
-
- GenTreeStoreInd* storeInd = node->AsStoreInd();
- GenTree* data = storeInd->Data();
- GenTree* addr = storeInd->Addr();
- var_types targetType = storeInd->TypeGet();
-
- assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
-
- GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(storeInd, data);
- if (writeBarrierForm != GCInfo::WBF_NoBarrier)
- {
- // data and addr must be in registers.
- // Consume both registers so that any copies of interfering registers are taken care of.
- genConsumeOperands(storeInd->AsOp());
-
- if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
- {
- return;
- }
-
- // At this point, we should not have any interference.
- // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
- noway_assert(data->gtRegNum != REG_ARG_0);
-
- // addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_ARG_0)
- {
- inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
- }
-
- // data goes in REG_ARG_1
- if (data->gtRegNum != REG_ARG_1)
- {
- inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
- }
-
- genGCWriteBarrier(storeInd, writeBarrierForm);
- }
- else
- {
- bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
- bool dataIsUnary = false;
- bool isRMWMemoryOp = storeInd->IsRMWMemoryOp();
- GenTree* rmwSrc = nullptr;
-
- // We must consume the operands in the proper execution order, so that liveness is
- // updated appropriately.
- if (!reverseOps)
- {
- genConsumeAddress(addr);
- }
-
- // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained
- // and non-indir operand of data is the source of RMW memory op.
- if (isRMWMemoryOp)
- {
- assert(data->isContained() && !data->OperIsLeaf());
-
- GenTreePtr rmwDst = nullptr;
-
- dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
- if (!dataIsUnary)
- {
- if (storeInd->IsRMWDstOp1())
- {
- rmwDst = data->gtGetOp1();
- rmwSrc = data->gtGetOp2();
- }
- else
- {
- assert(storeInd->IsRMWDstOp2());
- rmwDst = data->gtGetOp2();
- rmwSrc = data->gtGetOp1();
- }
-
- genConsumeRegs(rmwSrc);
- }
- else
- {
- // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
- // For unary RMW ops, src and dst of RMW memory op is the same. Lower
- // clears operand counts on rmwSrc and we don't need to perform a
- // genConsumeReg() on it.
- assert(storeInd->IsRMWDstOp1());
- rmwSrc = data->gtGetOp1();
- rmwDst = data->gtGetOp1();
- assert(rmwSrc->isContained());
- }
-
- assert(rmwSrc != nullptr);
- assert(rmwDst != nullptr);
- assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
- }
- else
- {
- genConsumeRegs(data);
- }
-
- if (reverseOps)
- {
- genConsumeAddress(addr);
- }
-
- if (isRMWMemoryOp)
- {
- if (dataIsUnary)
- {
- // generate code for unary RMW memory ops like neg/not
- getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
- storeInd);
- }
- else
- {
- if (data->OperIsShiftOrRotate())
- {
- // Generate code for shift RMW memory ops.
- // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] =
- // <amount> <shift> [addr]).
- assert(storeInd->IsRMWDstOp1());
- assert(rmwSrc == data->gtGetOp2());
- genCodeForShiftRMW(storeInd);
- }
- else
- {
- // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
- getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
- storeInd, rmwSrc);
- }
- }
- }
- else
- {
- getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
-// helper functions.
-//
-// Arguments:
-// writeBarrierForm - the write barrier form to use
-// addr - the address at which to do the store
-// data - the data to store
-//
-// Return Value:
-// true if an optimized write barrier form was used, false if not. If this
-// function returns false, the caller must emit a "standard" write barrier.
-
-bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
-{
- assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
-
-#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
- bool useOptimizedWriteBarriers = true;
-
-#ifdef DEBUG
- useOptimizedWriteBarriers =
- (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
-#endif
-
- if (!useOptimizedWriteBarriers)
- {
- return false;
- }
-
- const static int regToHelper[2][8] = {
- // If the target is known to be in managed memory
- {
- CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
- CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
- },
-
- // Don't know if the target is in managed memory
- {
- CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
- CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
- CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
- },
- };
-
- noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
- noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
- noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
- noway_assert(regToHelper[0][REG_ESP] == -1);
- noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
- noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
- noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
-
- noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
- noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
- noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
- noway_assert(regToHelper[1][REG_ESP] == -1);
- noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
- noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
- noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
-
- regNumber reg = data->gtRegNum;
- noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
-
- // Generate the following code:
- // lea edx, addr
- // call write_barrier_helper_reg
-
- // addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
- {
- inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
- }
-
- unsigned tgtAnywhere = 0;
- if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
- {
- tgtAnywhere = 1;
- }
-
- // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
- // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
- // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
-
- genEmitHelperCall(regToHelper[tgtAnywhere][reg],
- 0, // argSize
- EA_PTRSIZE); // retSize
-
- return true;
-#else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
- return false;
-#endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
-}
-
-// Produce code for a GT_CALL node
-void CodeGen::genCallInstruction(GenTreePtr node)
-{
- GenTreeCall* call = node->AsCall();
- assert(call->gtOper == GT_CALL);
-
- gtCallTypes callType = (gtCallTypes)call->gtCallType;
-
- IL_OFFSETX ilOffset = BAD_IL_OFFSET;
-
- // all virtuals should have been expanded into a control expression
- assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
-
- // Consume all the arg regs
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->IsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- continue;
- }
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // Deal with multi register passed struct args.
- if (argNode->OperGet() == GT_LIST)
- {
- GenTreeArgList* argListPtr = argNode->AsArgList();
- unsigned iterationNum = 0;
- for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
- {
- GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
- regNumber argReg = REG_NA;
-
- if (iterationNum == 0)
- {
- argReg = curArgTabEntry->regNum;
- }
- else
- {
- assert(iterationNum == 1);
- argReg = curArgTabEntry->otherRegNum;
- }
-
- genConsumeReg(putArgRegNode);
-
- // Validate the putArgRegNode has the right type.
- assert(putArgRegNode->TypeGet() ==
- compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
- .eightByteClassifications[iterationNum],
- curArgTabEntry->structDesc
- .eightByteSizes[iterationNum]));
- if (putArgRegNode->gtRegNum != argReg)
- {
- inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
- putArgRegNode->gtRegNum);
- }
- }
- }
- else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- {
- regNumber argReg = curArgTabEntry->regNum;
- genConsumeReg(argNode);
- if (argNode->gtRegNum != argReg)
- {
- inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
- }
- }
-
-#if FEATURE_VARARG
- // In the case of a varargs call,
- // the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the
- // integer and floating point registers so, let's do that.
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
- instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
- inst_RV_RV(ins, argNode->gtRegNum, targetReg);
- }
-#endif // FEATURE_VARARG
- }
-
-#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- // The call will pop its arguments.
- // for each putarg_stk:
- ssize_t stackArgBytes = 0;
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
- if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
- {
-#if defined(_TARGET_X86_)
- assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
- if (arg->OperGet() == GT_LONG)
- {
- assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
- }
-#endif // defined(_TARGET_X86_)
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (genActualType(arg->TypeGet()) == TYP_STRUCT)
- {
- assert(arg->OperGet() == GT_PUTARG_STK);
-
- GenTreeObj* obj = arg->gtGetOp1()->AsObj();
- stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass);
- }
- else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
- stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
- }
- args = args->gtOp.gtOp2;
- }
-#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-
- // Insert a null check on "this" pointer if asked.
- if (call->NeedsNullCheck())
- {
- const regNumber regThis = genGetThisArgReg(call);
- getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
- }
-
- // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
- CORINFO_METHOD_HANDLE methHnd;
- GenTree* target = call->gtControlExpr;
- if (callType == CT_INDIRECT)
- {
- assert(target == nullptr);
- target = call->gtCall.gtCallAddr;
- methHnd = nullptr;
- }
- else
- {
- methHnd = call->gtCallMethHnd;
- }
-
- CORINFO_SIG_INFO* sigInfo = nullptr;
-#ifdef DEBUG
- // Pass the call signature information down into the emitter so the emitter can associate
- // native call sites with the signatures they were generated from.
- if (callType != CT_HELPER)
- {
- sigInfo = call->callSig;
- }
-#endif // DEBUG
-
- // If fast tail call, then we are done. In this case we setup the args (both reg args
- // and stack args in incoming arg area) and call target in rax. Epilog sequence would
- // generate "jmp rax".
- if (call->IsFastTailCall())
- {
- // Don't support fast tail calling JIT helpers
- assert(callType != CT_HELPER);
-
- // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
- assert(target != nullptr);
-
- genConsumeReg(target);
- if (target->gtRegNum != REG_RAX)
- {
- inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
- }
- return;
- }
-
- // For a pinvoke to unmanged code we emit a label to clear
- // the GC pointer state before the callsite.
- // We can't utilize the typical lazy killing of GC pointers
- // at (or inside) the callsite.
- if (call->IsUnmanaged())
- {
- genDefineTempLabel(genCreateTempLabel());
- }
-
- // Determine return value size(s).
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- emitAttr retSize = EA_PTRSIZE;
- emitAttr secondRetSize = EA_UNKNOWN;
-
- if (call->HasMultiRegRetVal())
- {
- retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0));
- secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
- }
- else
- {
- assert(!varTypeIsStruct(call));
-
- if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
- {
- retSize = EA_GCREF;
- }
- else if (call->gtType == TYP_BYREF)
- {
- retSize = EA_BYREF;
- }
- }
-
- bool fPossibleSyncHelperCall = false;
- CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
-
-#ifdef DEBUGGING_SUPPORT
- // We need to propagate the IL offset information to the call instruction, so we can emit
- // an IL to native mapping record for the call, to support managed return value debugging.
- // We don't want tail call helper calls that were converted from normal calls to get a record,
- // so we skip this hash table lookup logic in that case.
- if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
- {
- (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
- }
-#endif // DEBUGGING_SUPPORT
-
-#if defined(_TARGET_X86_)
- // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
- // adjust its stack level accordingly.
- // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
- // pop when we're done.
- ssize_t argSizeForEmitter = stackArgBytes;
- if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0)
- {
- argSizeForEmitter = -stackArgBytes;
- }
-
-#endif // defined(_TARGET_X86_)
-
- if (target != nullptr)
- {
- if (target->isContainedIndir())
- {
- if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
- {
- // Note that if gtControlExpr is an indir of an absolute address, we mark it as
- // contained only if it can be encoded as PC-relative offset.
- assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
-
- genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo)(void*) target->AsIndir()
- ->Base()
- ->AsIntConCommon()
- ->IconValue() X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
- else
- {
- genEmitCall(emitter::EC_INDIR_ARD, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir() X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
- }
- else
- {
- // We have already generated code for gtControlExpr evaluating it into a register.
- // We just need to emit "call reg" in this case.
- assert(genIsValidIntReg(target->gtRegNum));
- genEmitCall(emitter::EC_INDIR_R, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) nullptr // addr
- X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, genConsumeReg(target));
- }
- }
-#ifdef FEATURE_READYTORUN_COMPILER
- else if (call->gtEntryPoint.addr != nullptr)
- {
- genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
- : emitter::EC_FUNC_TOKEN_INDIR,
- methHnd, INDEBUG_LDISASM_COMMA(sigInfo)(void*) call->gtEntryPoint.addr X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
-#endif
- else
- {
- // Generate a direct call to a non-virtual user defined or helper method
- assert(callType == CT_HELPER || callType == CT_USER_FUNC);
-
- void* addr = nullptr;
- if (callType == CT_HELPER)
- {
- // Direct call to a helper method.
- helperNum = compiler->eeGetHelperNum(methHnd);
- noway_assert(helperNum != CORINFO_HELP_UNDEF);
-
- void* pAddr = nullptr;
- addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-
- if (addr == nullptr)
- {
- addr = pAddr;
- }
-
- // tracking of region protected by the monitor in synchronized methods
- if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
- {
- fPossibleSyncHelperCall = true;
- }
- }
- else
- {
- // Direct call to a non-virtual user function.
- addr = call->gtDirectCallAddress;
- }
-
- // Non-virtual direct calls to known addresses
- genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
-
- // if it was a pinvoke we may have needed to get the address of a label
- if (genPendingCallLabel)
- {
- assert(call->IsUnmanaged());
- genDefineTempLabel(genPendingCallLabel);
- genPendingCallLabel = nullptr;
- }
-
-#if defined(_TARGET_X86_)
- // The call will pop its arguments.
- genStackLevel -= stackArgBytes;
-#endif // defined(_TARGET_X86_)
-
- // Update GC info:
- // All Callee arg registers are trashed and no longer contain any GC pointers.
- // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
- // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
- // registers from RBM_CALLEE_TRASH.
- assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
- assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
- gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
- gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
-
- var_types returnType = call->TypeGet();
- if (returnType != TYP_VOID)
- {
-#ifdef _TARGET_X86_
- if (varTypeIsFloating(returnType))
- {
- // Spill the value from the fp stack.
- // Then, load it into the target register.
- call->gtFlags |= GTF_SPILL;
- regSet.rsSpillFPStack(call);
- call->gtFlags |= GTF_SPILLED;
- call->gtFlags &= ~GTF_SPILL;
- }
- else
-#endif // _TARGET_X86_
- {
- regNumber returnReg;
-
- if (call->HasMultiRegRetVal())
- {
- assert(retTypeDesc != nullptr);
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- // If regs allocated to call node are different from ABI return
- // regs in which the call has returned its result, move the result
- // to regs allocated to call node.
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types regType = retTypeDesc->GetReturnRegType(i);
- returnReg = retTypeDesc->GetABIReturnReg(i);
- regNumber allocatedReg = call->GetRegNumByIdx(i);
- if (returnReg != allocatedReg)
- {
- inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
- }
- }
-
-#ifdef FEATURE_SIMD
- // A Vector3 return value is stored in xmm0 and xmm1.
- // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
- // the native compiler doesn't guarantee it.
- if (returnType == TYP_SIMD12)
- {
- returnReg = retTypeDesc->GetABIReturnReg(1);
- // Clear the upper 32 bits by two shift instructions.
- // retReg = retReg << 96
- // retReg = retReg >> 96
- getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
- getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
- }
-#endif // FEATURE_SIMD
- }
- else
- {
-#ifdef _TARGET_X86_
- if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
- {
- // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
- // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
- // correct argument registers.
- returnReg = REG_PINVOKE_TCB;
- }
- else
-#endif // _TARGET_X86_
- if (varTypeIsFloating(returnType))
- {
- returnReg = REG_FLOATRET;
- }
- else
- {
- returnReg = REG_INTRET;
- }
-
- if (call->gtRegNum != returnReg)
- {
- inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
- }
- }
-
- genProduceReg(call);
- }
- }
-
- // If there is nothing next, that means the result is thrown away, so this value is not live.
- // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
- if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
- {
- gcInfo.gcMarkRegSetNpt(RBM_INTRET);
- }
-
-#if defined(_TARGET_X86_)
- //-------------------------------------------------------------------------
- // Create a label for tracking of region protected by the monitor in synchronized methods.
- // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
- // so the GC state vars have been updated before creating the label.
-
- if (fPossibleSyncHelperCall)
- {
- switch (helperNum)
- {
- case CORINFO_HELP_MON_ENTER:
- case CORINFO_HELP_MON_ENTER_STATIC:
- noway_assert(compiler->syncStartEmitCookie == NULL);
- compiler->syncStartEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
- noway_assert(compiler->syncStartEmitCookie != NULL);
- break;
- case CORINFO_HELP_MON_EXIT:
- case CORINFO_HELP_MON_EXIT_STATIC:
- noway_assert(compiler->syncEndEmitCookie == NULL);
- compiler->syncEndEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
- noway_assert(compiler->syncEndEmitCookie != NULL);
- break;
- default:
- break;
- }
- }
-
- // Is the caller supposed to pop the arguments?
- if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0))
- {
- genAdjustSP(stackArgBytes);
- }
-#endif // _TARGET_X86_
-}
-
-// Produce code for a GT_JMP node.
-// The arguments of the caller needs to be transferred to the callee before exiting caller.
-// The actual jump to callee is generated as part of caller epilog sequence.
-// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
-void CodeGen::genJmpMethod(GenTreePtr jmp)
-{
- assert(jmp->OperGet() == GT_JMP);
- assert(compiler->compJmpOpUsed);
-
- // If no arguments, nothing to do
- if (compiler->info.compArgsCount == 0)
- {
- return;
- }
-
- // Make sure register arguments are in their initial registers
- // and stack arguments are put back as well.
- unsigned varNum;
- LclVarDsc* varDsc;
-
- // First move any en-registered stack arguments back to the stack.
- // At the same time any reg arg not in correct reg is moved back to its stack location.
- //
- // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
- // But that would require us to deal with circularity while moving values around. Spilling
- // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
- // are not frequent.
- for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
- {
- varDsc = compiler->lvaTable + varNum;
-
- if (varDsc->lvPromoted)
- {
- noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
-
- unsigned fieldVarNum = varDsc->lvFieldLclStart;
- varDsc = compiler->lvaTable + fieldVarNum;
- }
- noway_assert(varDsc->lvIsParam);
-
- if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
- {
- // Skip reg args which are already in its right register for jmp call.
- // If not, we will spill such args to their stack locations.
- //
- // If we need to generate a tail call profiler hook, then spill all
- // arg regs to free them up for the callback.
- if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
- {
- continue;
- }
- }
- else if (varDsc->lvRegNum == REG_STK)
- {
- // Skip args which are currently living in stack.
- continue;
- }
-
- // If we came here it means either a reg argument not in the right register or
- // a stack argument currently living in a register. In either case the following
- // assert should hold.
- assert(varDsc->lvRegNum != REG_STK);
-
- var_types loadType = varDsc->lvaArgType();
- getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
-
- // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
- // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
- // Therefore manually update life of varDsc->lvRegNum.
- regMaskTP tempMask = varDsc->lvRegMask();
- regSet.RemoveMaskVars(tempMask);
- gcInfo.gcMarkRegSetNpt(tempMask);
- if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
- }
-#endif // DEBUG
-
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
-
-#ifdef PROFILING_SUPPORTED
- // At this point all arg regs are free.
- // Emit tail call profiler callback.
- genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
-#endif
-
- // Next move any un-enregistered register arguments back to their register.
- regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
- unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
- for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
- {
- varDsc = compiler->lvaTable + varNum;
- if (varDsc->lvPromoted)
- {
- noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
-
- unsigned fieldVarNum = varDsc->lvFieldLclStart;
- varDsc = compiler->lvaTable + fieldVarNum;
- }
- noway_assert(varDsc->lvIsParam);
-
- // Skip if arg not passed in a register.
- if (!varDsc->lvIsRegArg)
- {
- continue;
- }
-
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- if (varTypeIsStruct(varDsc))
- {
- CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
- assert(typeHnd != nullptr);
-
- SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
- compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
- assert(structDesc.passedInRegisters);
-
- unsigned __int8 offset0 = 0;
- unsigned __int8 offset1 = 0;
- var_types type0 = TYP_UNKNOWN;
- var_types type1 = TYP_UNKNOWN;
-
- // Get the eightbyte data
- compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
-
- // Move the values into the right registers.
- //
-
- // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
- // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
- // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks
- // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
- // genCodeForBBList().
- if (type0 != TYP_UNKNOWN)
- {
- getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
- regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
- gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
- }
-
- if (type1 != TYP_UNKNOWN)
- {
- getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
- regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
- gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
- }
-
- if (varDsc->lvTracked)
- {
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
- else
-#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- {
- // Register argument
- noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
-
- // Is register argument already in the right register?
- // If not load it from its stack location.
- var_types loadType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
-
- if (varDsc->lvRegNum != argReg)
- {
- assert(genIsValidReg(argReg));
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
-
- // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
- // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
- // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
- // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
- regSet.AddMaskVars(genRegMask(argReg));
- gcInfo.gcMarkRegPtrVal(argReg, loadType);
- if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
- }
-#endif // DEBUG
-
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
- }
-
-#if FEATURE_VARARG && defined(_TARGET_AMD64_)
- // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
- // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
- // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
- // values on the stack.
- if (compiler->info.compIsVarArgs)
- {
- regNumber intArgReg;
- var_types loadType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
-
- if (varTypeIsFloating(loadType))
- {
- intArgReg = compiler->getCallArgIntRegister(argReg);
- instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
- inst_RV_RV(ins, argReg, intArgReg, loadType);
- }
- else
- {
- intArgReg = argReg;
- }
-
- fixedIntArgMask |= genRegMask(intArgReg);
-
- if (intArgReg == REG_ARG_0)
- {
- assert(firstArgVarNum == BAD_VAR_NUM);
- firstArgVarNum = varNum;
- }
- }
-#endif // FEATURE_VARARG
- }
-
-#if FEATURE_VARARG && defined(_TARGET_AMD64_)
- // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
- // load the remaining arg registers (both int and float) from the corresponding
- // shadow stack slots. This is for the reason that we don't know the number and type
- // of non-fixed params passed by the caller, therefore we have to assume the worst case
- // of caller passing float/double args both in int and float arg regs.
- //
- // This doesn't apply to x86, which doesn't pass floating point values in floating
- // point registers.
- //
- // The caller could have passed gc-ref/byref type var args. Since these are var args
- // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
- // remaining arg registers from shadow stack slots as non-gc interruptible.
- if (fixedIntArgMask != RBM_NONE)
- {
- assert(compiler->info.compIsVarArgs);
- assert(firstArgVarNum != BAD_VAR_NUM);
-
- regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
- if (remainingIntArgMask != RBM_NONE)
- {
- instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
- getEmitter()->emitDisableGC();
- for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
- {
- regNumber argReg = intArgRegs[argNum];
- regMaskTP argRegMask = genRegMask(argReg);
-
- if ((remainingIntArgMask & argRegMask) != 0)
- {
- remainingIntArgMask &= ~argRegMask;
- getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
-
- // also load it in corresponding float arg reg
- regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
- inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
- }
-
- argOffset += REGSIZE_BYTES;
- }
- getEmitter()->emitEnableGC();
- }
- }
-#endif // FEATURE_VARARG
-}
-
-// produce code for a GT_LEA subnode
-void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
-{
- emitAttr size = emitTypeSize(lea);
- genConsumeOperands(lea);
-
- if (lea->Base() && lea->Index())
- {
- regNumber baseReg = lea->Base()->gtRegNum;
- regNumber indexReg = lea->Index()->gtRegNum;
- getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
- }
- else if (lea->Base())
- {
- getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset);
- }
- else if (lea->Index())
- {
- getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale,
- lea->gtOffset);
- }
-
- genProduceReg(lea);
-}
-
-//-------------------------------------------------------------------------------------------
-// genJumpKindsForTree: Determine the number and kinds of conditional branches
-// necessary to implement the given GT_CMP node
-//
-// Arguments:
-// cmpTree - (input) The GenTree node that is used to set the Condition codes
-// - The GenTree Relop node that was used to set the Condition codes
-// jmpKind[2] - (output) One or two conditional branch instructions
-// jmpToTrueLabel[2] - (output) When true we branch to the true case
-// When false we create a second label and branch to the false case
-// Only GT_EQ for a floating point compares can have a false value.
-//
-// Return Value:
-// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
-//
-// Assumptions:
-// At least one conditional branch instruction will be returned.
-// Typically only one conditional branch is needed
-// and the second jmpKind[] value is set to EJ_NONE
-//
-// Notes:
-// jmpToTrueLabel[i]= true implies branch when the compare operation is true.
-// jmpToTrueLabel[i]= false implies branch when the compare operation is false.
-//-------------------------------------------------------------------------------------------
-
-// static
-void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
-{
- // Except for BEQ (= ordered GT_EQ) both jumps are to the true label.
- jmpToTrueLabel[0] = true;
- jmpToTrueLabel[1] = true;
-
- // For integer comparisons just use genJumpKindForOper
- if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
- {
- CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
- jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
- jmpKind[1] = EJ_NONE;
- }
- else
- {
- assert(cmpTree->OperIsCompare());
-
- // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
- // while generating code for compare opererators (e.g. GT_EQ etc).
- if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
- {
- // Must branch if we have an NaN, unordered
- switch (cmpTree->gtOper)
- {
- case GT_LT:
- case GT_GT:
- jmpKind[0] = EJ_jb;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_LE:
- case GT_GE:
- jmpKind[0] = EJ_jbe;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_NE:
- jmpKind[0] = EJ_jpe;
- jmpKind[1] = EJ_jne;
- break;
-
- case GT_EQ:
- jmpKind[0] = EJ_je;
- jmpKind[1] = EJ_NONE;
- break;
-
- default:
- unreached();
- }
- }
- else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
- {
- // Do not branch if we have an NaN, unordered
- switch (cmpTree->gtOper)
- {
- case GT_LT:
- case GT_GT:
- jmpKind[0] = EJ_ja;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_LE:
- case GT_GE:
- jmpKind[0] = EJ_jae;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_NE:
- jmpKind[0] = EJ_jne;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_EQ:
- jmpKind[0] = EJ_jpe;
- jmpKind[1] = EJ_je;
- jmpToTrueLabel[0] = false;
- break;
-
- default:
- unreached();
- }
- }
- }
-}
-
-#if !defined(_TARGET_64BIT_)
-//------------------------------------------------------------------------
-// genJumpKindsForTreeLongHi: Generate the jump types for compare
-// operators of the high parts of a compare with long type operands
-// on x86 for the case where rel-op result needs to be materialized into a
-// register.
-//
-// Arguments:
-// cmpTree - The GT_CMP node
-// jmpKind - Return array of jump kinds
-// jmpToTrueLabel - Return array of if the jump is going to true label
-//
-// Return Value:
-// None.
-//
-void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2])
-{
- assert(cmpTree->OperIsCompare());
- CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
- switch (cmpTree->gtOper)
- {
- case GT_LT:
- case GT_LE:
- if (compareKind == CK_SIGNED)
- {
- jmpKind[0] = EJ_jl;
- jmpKind[1] = EJ_jg;
- }
- else
- {
- jmpKind[0] = EJ_jb;
- jmpKind[1] = EJ_ja;
- }
- break;
-
- case GT_GT:
- case GT_GE:
- if (compareKind == CK_SIGNED)
- {
- jmpKind[0] = EJ_jg;
- jmpKind[1] = EJ_jl;
- }
- else
- {
- jmpKind[0] = EJ_ja;
- jmpKind[1] = EJ_jb;
- }
- break;
-
- case GT_EQ:
- // GT_EQ will not jump to the true label if the hi parts are equal
- jmpKind[0] = EJ_NONE;
- jmpKind[1] = EJ_jne;
- break;
-
- case GT_NE:
- // GT_NE will always jump to the true label if the high parts are not equal
- jmpKind[0] = EJ_jne;
- jmpKind[1] = EJ_NONE;
- break;
-
- default:
- unreached();
- }
-}
-
-//------------------------------------------------------------------------
-// genCompareLong: Generate code for comparing two longs on x86 when the result of the compare
-// is manifested in a register.
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-// Comments:
-// For long compares, we need to compare the high parts of operands first, then the low parts.
-// If the high compare is false, we do not need to compare the low parts. For less than and
-// greater than, if the high compare is true, we can assume the entire compare is true. For
-// compares that are realized in a register, we will generate:
-//
-// Opcode x86 equivalent Comment
-// ------ -------------- -------
-// GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare
-// jne label is false.
-// cmp loOp1,loOp2
-// label: sete
-//
-// GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare
-// jne label is true.
-// cmp loOp1,loOp2
-// label: setne
-//
-// GT_LT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne label correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// label: setb
-//
-// GT_LE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne label correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// label: setbe
-//
-// GT_GT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne label correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// label: seta
-//
-// GT_GE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne label correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// label: setae
-//
-// For signed long comparisons, we need additional labels, as we need to use signed conditions on the
-// "set" instruction:
-//
-// GT_LT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne labelHi correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// setb Unsigned set for lo compare
-// jmp labelFinal
-// labelHi: setl Signed set for high compare
-// labelFinal:
-//
-// GT_LE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne labelHi correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// setbe Unsigend set for lo compare
-// jmp labelFinal
-// labelHi: setle Signed set for hi compare
-// labelFinal:
-//
-// GT_GT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne labelHi correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// seta Unsigned set for lo compare
-// jmp labelFinal
-// labelHi: setg Signed set for high compare
-// labelFinal
-//
-// GT_GE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne labelHi correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// setae Unsigned set for lo compare
-// jmp labelFinal
-// labelHi: setge Signed set for hi compare
-// labelFinal:
-//
-// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
-void CodeGen::genCompareLong(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
-
- assert(varTypeIsLong(op1->TypeGet()));
- assert(varTypeIsLong(op2->TypeGet()));
-
- regNumber targetReg = treeNode->gtRegNum;
-
- genConsumeOperands(tree);
-
- assert(targetReg != REG_NA);
-
- GenTreePtr loOp1 = op1->gtGetOp1();
- GenTreePtr hiOp1 = op1->gtGetOp2();
- GenTreePtr loOp2 = op2->gtGetOp1();
- GenTreePtr hiOp2 = op2->gtGetOp2();
-
- // Create compare for the high parts
- instruction ins = INS_cmp;
- var_types cmpType = TYP_INT;
- emitAttr cmpAttr = emitTypeSize(cmpType);
-
- // Emit the compare instruction
- getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
-
- // Generate the first jump for the high compare
- CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
- BasicBlock* labelHi = genCreateTempLabel();
- BasicBlock* labelFinal = genCreateTempLabel();
-
- if (compareKind == CK_SIGNED && (tree->gtOper != GT_NE && tree->gtOper != GT_EQ))
- {
- // If we are doing a signed comparison, we need to do a signed set if the high compare is true,
- // but an unsigned set if we fall through to the low compare. If we have a GT_NE or GT_EQ, we do not
- // need to worry about the sign of the comparison, so we can use the simplified case.
-
- // We only have to check for equality for the hi comparison. If they are not equal, then the set will
- // do the right thing. If they are equal, we have to check the lo halves.
- inst_JMP(EJ_jne, labelHi);
-
- // Emit the comparison. Perform the set for the lo. Jump to labelFinal
- getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
-
- // The low set must be unsigned
- emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
- inst_SET(jumpKindLo, targetReg);
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- genProduceReg(tree);
-
- inst_JMP(EJ_jmp, labelFinal);
-
- // Define the label for hi jump target here. If we have jumped here, we want to set
- // the target register based on the jump kind of the actual compare type.
-
- genDefineTempLabel(labelHi);
- inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
-
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- genProduceReg(tree);
-
- genDefineTempLabel(labelFinal);
- }
- else
- {
- // If the compare is unsigned, or if the sign doesn't change the set instruction, we can use
- // the same set logic for both the hi and lo compare, so we don't need to jump to a high label,
- // we can just jump to the set that the lo compare will use.
-
- // We only have to check for equality for the hi comparison. If they are not equal, then the set will
- // do the right thing. If they are equal, we have to check the lo halves.
- inst_JMP(EJ_jne, labelFinal);
-
- // Emit the comparison
- getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
-
- // Define the label for hi jump target here. If we have jumped here, we want to set
- // the target register based on the jump kind of the lower half (the actual compare
- // type). If we have fallen through, then we are doing a normal int compare for the
- // lower parts
-
- genDefineTempLabel(labelFinal);
-
- // The low set must be unsigned
- emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
- inst_SET(jumpKindLo, targetReg);
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- genProduceReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
-// is not manifested in a register.
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-// Comments:
-// For long compares, we need to compare the high parts of operands first, then the low parts.
-// We only have to do the low compare if the high parts of the operands are equal.
-//
-// In the case where the result of a rel-op is not realized in a register, we generate:
-//
-// Opcode x86 equivalent Comment
-// ------ -------------- -------
-//
-// GT_LT; unsigned cmp hiOp1,hiOp2
-// jb trueLabel
-// ja falseLabel
-// cmp loOp1,loOp2
-// jb trueLabel
-// falseLabel:
-//
-// GT_LE; unsigned cmp hiOp1,hiOp2
-// jb trueLabel
-// ja falseLabel
-// cmp loOp1,loOp2
-// jbe trueLabel
-// falseLabel:
-//
-// GT_GT; unsigned cmp hiOp1,hiOp2
-// ja trueLabel
-// jb falseLabel
-// cmp loOp1,loOp2
-// ja trueLabel
-// falseLabel:
-//
-// GT_GE; unsigned cmp hiOp1,hiOp2
-// ja trueLabel
-// jb falseLabel
-// cmp loOp1,loOp2
-// jae trueLabel
-// falseLabel:
-//
-// GT_LT; signed cmp hiOp1,hiOp2
-// jl trueLabel
-// jg falseLabel
-// cmp loOp1,loOp2
-// jb trueLabel
-// falseLabel:
-//
-// GT_LE; signed cmp hiOp1,hiOp2
-// jl trueLabel
-// jg falseLabel
-// cmp loOp1,loOp2
-// jbe trueLabel
-// falseLabel:
-//
-// GT_GT; signed cmp hiOp1,hiOp2
-// jg trueLabel
-// jl falseLabel
-// cmp loOp1,loOp2
-// ja trueLabel
-// falseLabel:
-//
-// GT_GE; signed cmp hiOp1,hiOp2
-// jg trueLabel
-// jl falseLabel
-// cmp loOp1,loOp2
-// jae trueLabel
-// falseLabel:
-//
-// GT_EQ; cmp hiOp1,hiOp2
-// jne falseLabel
-// cmp loOp1,loOp2
-// je trueLabel
-// falseLabel:
-//
-// GT_NE; cmp hiOp1,hiOp2
-// jne labelTrue
-// cmp loOp1,loOp2
-// jne trueLabel
-// falseLabel:
-//
-// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
-void CodeGen::genJTrueLong(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
-
- assert(varTypeIsLong(op1->TypeGet()));
- assert(varTypeIsLong(op2->TypeGet()));
-
- regNumber targetReg = treeNode->gtRegNum;
-
- assert(targetReg == REG_NA);
-
- GenTreePtr loOp1 = op1->gtGetOp1();
- GenTreePtr hiOp1 = op1->gtGetOp2();
- GenTreePtr loOp2 = op2->gtGetOp1();
- GenTreePtr hiOp2 = op2->gtGetOp2();
-
- // Emit the compare instruction
- getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
-
- // Generate the first jump for the high compare
- CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
- // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
- BasicBlock* falseLabel = genCreateTempLabel();
-
- emitJumpKind jumpKindHi[2];
-
- // Generate the jumps for the high compare
- genJumpKindsForTreeLongHi(tree, jumpKindHi);
-
- BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
-
- if (jumpKindHi[0] != EJ_NONE)
- {
- inst_JMP(jumpKindHi[0], trueLabel);
- }
-
- if (jumpKindHi[1] != EJ_NONE)
- {
- inst_JMP(jumpKindHi[1], falseLabel);
- }
-
- // The low jump must be unsigned
- emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
- // Emit the comparison and the jump to the trueLabel
- getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
-
- inst_JMP(jumpKindLo, trueLabel);
-
- // Generate falseLabel, which is the false path. We will jump here if the high compare is false
- // or fall through if the low compare is false.
- genDefineTempLabel(falseLabel);
-}
-#endif //! defined(_TARGET_64BIT_)
-
-//------------------------------------------------------------------------
-// genCompareFloat: Generate code for comparing two floating point values
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-// Comments:
-// SSE2 instruction ucomis[s|d] is performs unordered comparison and
-// updates rFLAGS register as follows.
-// Result of compare ZF PF CF
-// ----------------- ------------
-// Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN.
-// Greater 0 0 0
-// Less Than 0 0 1
-// Equal 1 0 0
-//
-// From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
-// unordered comparison of floating point values. That is *.UN comparisons result in true when
-// one of the operands is a NaN whereas ordered comparisons results in false.
-//
-// Opcode Amd64 equivalent Comment
-// ------ ----------------- --------
-// BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above
-// jb table
-//
-// BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
-// ja
-//
-// BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered
-// jb
-//
-// BGT(a, b) ucomis[s|d] a, b branch if a>b
-// ja
-//
-// BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
-// jbe
-//
-// BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b
-// jae
-//
-// BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered
-// jbe
-//
-// BGE(a,b) ucomis[s|d] a, b branch if a>=b
-// jae
-//
-// BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec.
-// je This case is given for completeness, in case if JIT generates such
-// a gentree internally.
-//
-// BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b.
-// jpe L1
-// je <true label>
-// L1:
-//
-// BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is
-// jne given for completeness, in case if JIT generates such a gentree
-// internally.
-//
-// BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b
-// jpe <true label>
-// jne <true label>
-//
-// As we can see from the above equalities that the operands of a compare operator need to be
-// reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
-void CodeGen::genCompareFloat(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
-
- genConsumeOperands(tree);
-
- assert(varTypeIsFloating(op1Type));
- assert(op1Type == op2Type);
-
- regNumber targetReg = treeNode->gtRegNum;
- instruction ins;
- emitAttr cmpAttr;
-
- bool reverseOps;
- if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
- {
- // Unordered comparison case
- reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
- }
- else
- {
- reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
- }
-
- if (reverseOps)
- {
- GenTreePtr tmp = op1;
- op1 = op2;
- op2 = tmp;
- }
-
- ins = ins_FloatCompare(op1Type);
- cmpAttr = emitTypeSize(op1Type);
-
- getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
-
- // Are we evaluating this into a register?
- if (targetReg != REG_NA)
- {
- genSetRegToCond(targetReg, tree);
- genProduceReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genCompareInt: Generate code for comparing ints or, on amd64, longs.
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-void CodeGen::genCompareInt(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
-
- genConsumeOperands(tree);
-
- instruction ins;
- emitAttr cmpAttr;
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
- assert(!varTypeIsFloating(op2Type));
-
-#ifdef _TARGET_X86_
- assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
-#endif // _TARGET_X86_
-
- // By default we use an int32 sized cmp instruction
- //
- ins = INS_cmp;
- var_types cmpType = TYP_INT;
-
- // In the if/then/else statement below we may change the
- // 'cmpType' and/or 'ins' to generate a smaller instruction
-
- // Are we comparing two values that are the same size?
- //
- if (genTypeSize(op1Type) == genTypeSize(op2Type))
- {
- if (op1Type == op2Type)
- {
- // If both types are exactly the same we can use that type
- cmpType = op1Type;
- }
- else if (genTypeSize(op1Type) == 8)
- {
- // If we have two different int64 types we need to use a long compare
- cmpType = TYP_LONG;
- }
-
- cmpAttr = emitTypeSize(cmpType);
- }
- else // Here we know that (op1Type != op2Type)
- {
- // Do we have a short compare against a constant in op2?
- //
- // We checked for this case in LowerCmp() and if we can perform a small
- // compare immediate we labeled this compare with a GTF_RELOP_SMALL
- // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
- //
- if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
- {
- assert(varTypeIsSmall(op1Type));
- cmpType = op1Type;
- }
-#ifdef _TARGET_AMD64_
- else // compare two different sized operands
- {
- // For this case we don't want any memory operands, only registers or immediates
- //
- assert(!op1->isContainedMemoryOp());
- assert(!op2->isContainedMemoryOp());
-
- // Check for the case where one operand is an int64 type
- // Lower should have placed 32-bit operand in a register
- // for signed comparisons we will sign extend the 32-bit value in place.
- //
- bool op1Is64Bit = (genTypeSize(op1Type) == 8);
- bool op2Is64Bit = (genTypeSize(op2Type) == 8);
- if (op1Is64Bit)
- {
- cmpType = TYP_LONG;
- if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
- {
- assert(op2->gtRegNum != REG_NA);
- inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
- }
- }
- else if (op2Is64Bit)
- {
- cmpType = TYP_LONG;
- if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
- {
- assert(op1->gtRegNum != REG_NA);
- }
- }
- }
-#endif // _TARGET_AMD64_
-
- cmpAttr = emitTypeSize(cmpType);
- }
-
- // See if we can generate a "test" instruction instead of a "cmp".
- // For this to generate the correct conditional branch we must have
- // a compare against zero.
- //
- if (op2->IsIntegralConst(0))
- {
- if (op1->isContained())
- {
- // op1 can be a contained memory op
- // or the special contained GT_AND that we created in Lowering::LowerCmp()
- //
- if ((op1->OperGet() == GT_AND))
- {
- noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
-
- ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
- op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
- op1 = op1->gtOp.gtOp1; // overwrite op1
-
- if (op1->isContainedMemoryOp())
- {
- // use the size andOp1 if it is a contained memoryop.
- cmpAttr = emitTypeSize(op1->TypeGet());
- }
- // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
- }
- else // op1 is not contained thus it must be in a register
- {
- ins = INS_test;
- op2 = op1; // we will generate "test reg1,reg1"
- // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
- }
-
- getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
-
- // Are we evaluating this into a register?
- if (targetReg != REG_NA)
- {
- genSetRegToCond(targetReg, tree);
- genProduceReg(tree);
- }
-}
-
-//-------------------------------------------------------------------------------------------
-// genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
-// corresponding to a binary Relational operator result.
-//
-// Arguments:
-// dstReg - The target register to set to 1 or 0
-// tree - The GenTree Relop node that was used to set the Condition codes
-//
-// Return Value: none
-//
-// Notes:
-// A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
-//-------------------------------------------------------------------------------------------
-
-void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
-{
- noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
-
- emitJumpKind jumpKind[2];
- bool branchToTrueLabel[2];
- genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
-
- if (jumpKind[1] == EJ_NONE)
- {
- // Set (lower byte of) reg according to the flags
- inst_SET(jumpKind[0], dstReg);
- }
- else
- {
-#ifdef DEBUG
- // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
- // These are represented by two conditions.
- if (tree->gtOper == GT_EQ)
- {
- // This must be an ordered comparison.
- assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
- }
- else
- {
- // This must be BNE.UN
- assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
- }
-#endif
-
- // Here is the sample code generated in each case:
- // BEQ == cmp, jpe <false label>, je <true label>
- // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
- // setnp reg // if (PF==0) reg = 1 else reg = 0
- // jpe L1 // Jmp if PF==1
- // sete reg
- // L1:
- //
- // BNE.UN == cmp, jpe <true label>, jne <true label>
- // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
- // setp reg
- // jpe L1
- // setne reg
- // L1:
-
- // reverse the jmpkind condition before setting dstReg if it is to false label.
- inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
-
- BasicBlock* label = genCreateTempLabel();
- inst_JMP(jumpKind[0], label);
-
- // second branch is always to true label
- assert(branchToTrueLabel[1]);
- inst_SET(jumpKind[1], dstReg);
- genDefineTempLabel(label);
- }
-
- var_types treeType = tree->TypeGet();
- if (treeType == TYP_INT || treeType == TYP_LONG)
- {
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- }
- else
- {
- noway_assert(treeType == TYP_BYTE);
- }
-}
-
-//------------------------------------------------------------------------
-// genIntToIntCast: Generate code for an integer cast
-// This method handles integer overflow checking casts
-// as well as ordinary integer casts.
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The treeNode is not a contained node and must have an assigned register.
-// For a signed convert from byte, the source must be in a byte-addressable register.
-// Neither the source nor target type can be a floating point type.
-//
-// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
-// TODO: refactor to use getCastDescription
-//
-void CodeGen::genIntToIntCast(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_CAST);
-
- GenTreePtr castOp = treeNode->gtCast.CastOp();
- regNumber targetReg = treeNode->gtRegNum;
- regNumber sourceReg = castOp->gtRegNum;
- var_types dstType = treeNode->CastToType();
- bool isUnsignedDst = varTypeIsUnsigned(dstType);
- var_types srcType = genActualType(castOp->TypeGet());
- bool isUnsignedSrc = varTypeIsUnsigned(srcType);
-
- // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
- if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
- {
- srcType = genUnsignedType(srcType);
- isUnsignedSrc = true;
- }
-
- bool requiresOverflowCheck = false;
- bool needAndAfter = false;
-
- assert(genIsValidIntReg(targetReg));
- assert(genIsValidIntReg(sourceReg));
-
- instruction ins = INS_invalid;
- emitAttr size = EA_UNKNOWN;
-
- if (genTypeSize(srcType) < genTypeSize(dstType))
- {
- // Widening cast
-
- // Is this an Overflow checking cast?
- // We only need to handle one case, as the other casts can never overflow.
- // cast from TYP_INT to TYP_ULONG
- //
- if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG))
- {
- requiresOverflowCheck = true;
- size = EA_ATTR(genTypeSize(srcType));
- ins = INS_mov;
- }
- else
- {
- // we need the source size
- size = EA_ATTR(genTypeSize(srcType));
- noway_assert(size < EA_PTRSIZE);
-
- ins = ins_Move_Extend(srcType, castOp->InReg());
-
- /*
- Special case: ins_Move_Extend assumes the destination type is no bigger
- than TYP_INT. movsx and movzx can already extend all the way to
- 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd),
- but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode.
- */
- if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE))
- {
-#ifdef _TARGET_X86_
- NYI_X86("Cast to 64 bit for x86/RyuJIT");
-#else // !_TARGET_X86_
- ins = INS_movsxd;
-#endif // !_TARGET_X86_
- }
-
- /*
- Special case: for a cast of byte to char we first
- have to expand the byte (w/ sign extension), then
- mask off the high bits.
- Use 'movsx' followed by 'and'
- */
- if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE))
- {
- noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
- needAndAfter = true;
- }
- }
- }
- else
- {
- // Narrowing cast, or sign-changing cast
- noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
-
- // Is this an Overflow checking cast?
- if (treeNode->gtOverflow())
- {
- requiresOverflowCheck = true;
- size = EA_ATTR(genTypeSize(srcType));
- ins = INS_mov;
- }
- else
- {
- size = EA_ATTR(genTypeSize(dstType));
- ins = ins_Move_Extend(dstType, castOp->InReg());
- }
- }
-
- noway_assert(ins != INS_invalid);
-
- genConsumeReg(castOp);
-
- if (requiresOverflowCheck)
- {
- ssize_t typeMin = 0;
- ssize_t typeMax = 0;
- ssize_t typeMask = 0;
- bool needScratchReg = false;
- bool signCheckOnly = false;
-
- /* Do we need to compare the value, or just check masks */
-
- switch (dstType)
- {
- case TYP_BYTE:
- typeMask = ssize_t((int)0xFFFFFF80);
- typeMin = SCHAR_MIN;
- typeMax = SCHAR_MAX;
- break;
-
- case TYP_UBYTE:
- typeMask = ssize_t((int)0xFFFFFF00L);
- break;
-
- case TYP_SHORT:
- typeMask = ssize_t((int)0xFFFF8000);
- typeMin = SHRT_MIN;
- typeMax = SHRT_MAX;
- break;
-
- case TYP_CHAR:
- typeMask = ssize_t((int)0xFFFF0000L);
- break;
-
- case TYP_INT:
- if (srcType == TYP_UINT)
- {
- signCheckOnly = true;
- }
- else
- {
- typeMask = 0xFFFFFFFF80000000LL;
- typeMin = INT_MIN;
- typeMax = INT_MAX;
- }
- break;
-
- case TYP_UINT:
- if (srcType == TYP_INT)
- {
- signCheckOnly = true;
- }
- else
- {
- needScratchReg = true;
- }
- break;
-
- case TYP_LONG:
- noway_assert(srcType == TYP_ULONG);
- signCheckOnly = true;
- break;
-
- case TYP_ULONG:
- noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
- signCheckOnly = true;
- break;
-
- default:
- NO_WAY("Unknown type");
- return;
- }
-
- if (signCheckOnly)
- {
- // We only need to check for a negative value in sourceReg
- inst_RV_IV(INS_cmp, sourceReg, 0, size);
- genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
- }
- else
- {
- regNumber tmpReg = REG_NA;
-
- if (needScratchReg)
- {
- // We need an additional temp register
- // Make sure we have exactly one allocated.
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
- }
-
- // When we are converting from unsigned or to unsigned, we
- // will only have to check for any bits set using 'typeMask'
- if (isUnsignedSrc || isUnsignedDst)
- {
- if (needScratchReg)
- {
- inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits
- genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero
- }
- else
- {
- noway_assert(typeMask != 0);
- inst_RV_IV(INS_TEST, sourceReg, typeMask, size);
- genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
- }
- }
- else
- {
- // For a narrowing signed cast
- //
- // We must check the value is in a signed range.
-
- // Compare with the MAX
-
- noway_assert((typeMin != 0) && (typeMax != 0));
-
- inst_RV_IV(INS_cmp, sourceReg, typeMax, size);
- genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
-
- // Compare with the MIN
-
- inst_RV_IV(INS_cmp, sourceReg, typeMin, size);
- genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
- }
- }
-
- if (targetReg != sourceReg
-#ifdef _TARGET_AMD64_
- // On amd64, we can hit this path for a same-register
- // 4-byte to 8-byte widening conversion, and need to
- // emit the instruction to set the high bits correctly.
- || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
-#endif // _TARGET_AMD64_
- )
- inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
- }
- else // non-overflow checking cast
- {
- noway_assert(size < EA_PTRSIZE || srcType == dstType);
-
- // We may have code transformations that result in casts where srcType is the same as dstType.
- // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
- // long constant to a long lclVar.
- if (srcType == dstType)
- {
- ins = INS_mov;
- }
- /* Is the value sitting in a non-byte-addressable register? */
- else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg))
- {
- if (isUnsignedDst)
- {
- // for unsigned values we can AND, so it need not be a byte register
- ins = INS_AND;
- }
- else
- {
- // Move the value into a byte register
- noway_assert(!"Signed byte convert from non-byte-addressable register");
- }
-
- /* Generate "mov targetReg, castOp->gtReg */
- if (targetReg != sourceReg)
- {
- inst_RV_RV(INS_mov, targetReg, sourceReg, srcType);
- }
- }
-
- if (ins == INS_AND)
- {
- noway_assert((needAndAfter == false) && isUnsignedDst);
-
- /* Generate "and reg, MASK */
- unsigned fillPattern;
- if (size == EA_1BYTE)
- {
- fillPattern = 0xff;
- }
- else if (size == EA_2BYTE)
- {
- fillPattern = 0xffff;
- }
- else
- {
- fillPattern = 0xffffffff;
- }
-
- inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE);
- }
-#ifdef _TARGET_AMD64_
- else if (ins == INS_movsxd)
- {
- noway_assert(!needAndAfter);
- inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
- }
-#endif // _TARGET_AMD64_
- else if (ins == INS_mov)
- {
- noway_assert(!needAndAfter);
- if (targetReg != sourceReg
-#ifdef _TARGET_AMD64_
- // On amd64, 'mov' is the opcode used to zero-extend from
- // 4 bytes to 8 bytes.
- || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
-#endif // _TARGET_AMD64_
- )
- {
- inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
- }
- }
- else
- {
- noway_assert(ins == INS_movsx || ins == INS_movzx);
-
- /* Generate "mov targetReg, castOp->gtReg */
- inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
-
- /* Mask off high bits for cast from byte to char */
- if (needAndAfter)
- {
- noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
- inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE);
- }
- }
- }
-
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genFloatToFloatCast: Generate code for a cast between float and double
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// Cast is a non-overflow conversion.
-// The treeNode must have an assigned register.
-// The cast is between float and double or vice versa.
-//
-void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
-{
- // float <--> double conversions are always non-overflow ones
- assert(treeNode->OperGet() == GT_CAST);
- assert(!treeNode->gtOverflow());
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(genIsValidFloatReg(targetReg));
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
-#ifdef DEBUG
- // If not contained, must be a valid float reg.
- if (!op1->isContained())
- {
- assert(genIsValidFloatReg(op1->gtRegNum));
- }
-#endif
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = op1->TypeGet();
- assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
-
- genConsumeOperands(treeNode->AsOp());
- if (srcType == dstType && targetReg == op1->gtRegNum)
- {
- // source and destinations types are the same and also reside in the same register.
- // we just need to consume and produce the reg in this case.
- ;
- }
- else
- {
- instruction ins = ins_FloatConv(dstType, srcType);
- getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
- }
-
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genIntToFloatCast: Generate code to cast an int/long to float/double
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// Cast is a non-overflow conversion.
-// The treeNode must have an assigned register.
-// SrcType= int32/uint32/int64/uint64 and DstType=float/double.
-//
-void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
-{
- // int type --> float/double conversions are always non-overflow ones
- assert(treeNode->OperGet() == GT_CAST);
- assert(!treeNode->gtOverflow());
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(genIsValidFloatReg(targetReg));
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
-#ifdef DEBUG
- if (!op1->isContained())
- {
- assert(genIsValidIntReg(op1->gtRegNum));
- }
-#endif
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = op1->TypeGet();
- assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
-
-#if !defined(_TARGET_64BIT_)
- NYI_IF(varTypeIsLong(srcType), "Conversion from long to float");
-#endif // !defined(_TARGET_64BIT_)
-
- // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
- // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except
- // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
- // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
- // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
- // temp and using temp as operand of cast operation.
- if (srcType == TYP_BYREF)
- {
- noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
- srcType = TYP_I_IMPL;
- }
-
- // force the srcType to unsigned if GT_UNSIGNED flag is set
- if (treeNode->gtFlags & GTF_UNSIGNED)
- {
- srcType = genUnsignedType(srcType);
- }
-
- noway_assert(!varTypeIsGC(srcType));
-
- // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
- // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
- // either the front-end or lowering phase to have generated two levels of cast.
- // The first one is for widening smaller int type to int32 and the second one is
- // to the float/double.
- emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
- noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
-
- // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
- // here since they should have been lowered apropriately.
- noway_assert(srcType != TYP_UINT);
- noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
-
- // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
- // which does a partial write to lower 4/8 bytes of xmm register keeping the other
- // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
- // the partial write could introduce a false dependency and could cause a stall
- // if there are further uses of xmmReg. We have such a case occuring with a
- // customer reported version of SpectralNorm benchmark, resulting in 2x perf
- // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
- // cvtsi2ss/sd instruction.
-
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
-
- // Note that here we need to specify srcType that will determine
- // the size of source reg/mem operand and rex.w prefix.
- instruction ins = ins_FloatConv(dstType, TYP_INT);
- getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
-
- // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
- // will interpret ULONG value as LONG. Hence we need to adjust the
- // result if sign-bit of srcType is set.
- if (srcType == TYP_ULONG)
- {
- // The instruction sequence below is less accurate than what clang
- // and gcc generate. However, we keep the current sequence for backward compatiblity.
- // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
- // should be also updated for consistent conversion result.
- assert(dstType == TYP_DOUBLE);
- assert(!op1->isContained());
-
- // Set the flags without modifying op1.
- // test op1Reg, op1Reg
- inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
-
- // No need to adjust result if op1 >= 0 i.e. positive
- // Jge label
- BasicBlock* label = genCreateTempLabel();
- inst_JMP(EJ_jge, label);
-
- // Adjust the result
- // result = result + 0x43f00000 00000000
- // addsd resultReg, 0x43f00000 00000000
- GenTreePtr* cns = &u8ToDblBitmask;
- if (*cns == nullptr)
- {
- double d;
- static_assert_no_msg(sizeof(double) == sizeof(__int64));
- *((__int64*)&d) = 0x43f0000000000000LL;
-
- *cns = genMakeConst(&d, dstType, treeNode, true);
- }
- inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns);
-
- genDefineTempLabel(label);
- }
-
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genFloatToIntCast: Generate code to cast float/double to int/long
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// Cast is a non-overflow conversion.
-// The treeNode must have an assigned register.
-// SrcType=float/double and DstType= int32/uint32/int64/uint64
-//
-// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
-//
-void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
-{
- // we don't expect to see overflow detecting float/double --> int type conversions here
- // as they should have been converted into helper calls by front-end.
- assert(treeNode->OperGet() == GT_CAST);
- assert(!treeNode->gtOverflow());
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(genIsValidIntReg(targetReg));
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
-#ifdef DEBUG
- if (!op1->isContained())
- {
- assert(genIsValidFloatReg(op1->gtRegNum));
- }
-#endif
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = op1->TypeGet();
- assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
-
- // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
- // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
- // front-end or lowering phase to have generated two levels of cast. The first one is
- // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
- // the required smaller int type.
- emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
- noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
-
- // We shouldn't be seeing uint64 here as it should have been converted
- // into a helper call by either front-end or lowering phase.
- noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
-
- // If the dstType is TYP_UINT, we have 32-bits to encode the
- // float number. Any of 33rd or above bits can be the sign bit.
- // To acheive it we pretend as if we are converting it to a long.
- if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
- {
- dstType = TYP_LONG;
- }
-
- // Note that we need to specify dstType here so that it will determine
- // the size of destination integer register and also the rex.w prefix.
- genConsumeOperands(treeNode->AsOp());
- instruction ins = ins_FloatConv(TYP_INT, srcType);
- getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genCkfinite: Generate code for ckfinite opcode.
-//
-// Arguments:
-// treeNode - The GT_CKFINITE node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// GT_CKFINITE node has reserved an internal register.
-//
-// TODO-XArch-CQ - mark the operand as contained if known to be in
-// memory (e.g. field or an array element).
-//
-void CodeGen::genCkfinite(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_CKFINITE);
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- var_types targetType = treeNode->TypeGet();
- int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
- regNumber targetReg = treeNode->gtRegNum;
-
- // Extract exponent into a register.
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
-
- genConsumeReg(op1);
-
-#ifdef _TARGET_64BIT_
-
- // Copy the floating-point value to an integer register. If we copied a float to a long, then
- // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
- // bits of the integer register.
- instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
- inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType);
- if (targetType == TYP_DOUBLE)
- {
- // right shift by 32 bits to get to exponent.
- inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
- }
-
- // Mask exponent with all 1's and check if the exponent is all 1's
- inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
- inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
-
- // If exponent is all 1's, throw ArithmeticException
- genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
-
- // if it is a finite value copy it to targetReg
- if (targetReg != op1->gtRegNum)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
- }
-
-#else // !_TARGET_64BIT_
-
- // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register.
- // There is no easy way to do this. To not require an extra register, we'll use shuffles
- // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we
- // need to produce the value into the target register.
- //
- // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
- // movaps targetReg, op1->gtRegNum
- // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
- // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
- // and tmpReg, <mask>
- // cmp tmpReg, <mask>
- // je <throw block>
- // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
- //
- // For TYP_DOUBLE with (targetReg == op1->gtRegNum):
- // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
- // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
- // and tmpReg, <mask>
- // cmp tmpReg, <mask>
- // je <throw block>
- // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
- //
- // For TYP_FLOAT, it's the same as _TARGET_64BIT_:
- // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
- // and tmpReg, <mask>
- // cmp tmpReg, <mask>
- // je <throw block>
- // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
-
- regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
-
- if (targetType == TYP_DOUBLE)
- {
- if (targetReg != op1->gtRegNum)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
- }
- inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
- copyToTmpSrcReg = targetReg;
- }
- else
- {
- copyToTmpSrcReg = op1->gtRegNum;
- }
-
- // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
- // value, no matter the floating-point type.
- inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
-
- // Mask exponent with all 1's and check if the exponent is all 1's
- inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
- inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
-
- // If exponent is all 1's, throw ArithmeticException
- genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
-
- if (targetReg != op1->gtRegNum)
- {
- // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched,
- // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE
- // than re-shuffling the targetReg.
- inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
- }
- else if (targetType == TYP_DOUBLE)
- {
- // We need to re-shuffle the targetReg to get the correct result.
- inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
- }
-
-#endif // !_TARGET_64BIT_
-
- genProduceReg(treeNode);
-}
-
-#ifdef _TARGET_AMD64_
-int CodeGenInterface::genSPtoFPdelta()
-{
- int delta;
-
-#ifdef PLATFORM_UNIX
-
- // We require frame chaining on Unix to support native tool unwinding (such as
- // unwinding by the native debugger). We have a CLR-only extension to the
- // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240.
- // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
- delta = genTotalFrameSize();
-
-#else // !PLATFORM_UNIX
-
- // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
- // RBP needs to be reported in unwind codes. This case would arise for methods
- // with localloc.
- if (compiler->compLocallocUsed)
- {
- // We cannot base delta computation on compLclFrameSize since it changes from
- // tentative to final frame layout and hence there is a possibility of
- // under-estimating offset of vars from FP, which in turn results in under-
- // estimating instruction size.
- //
- // To be predictive and so as never to under-estimate offset of vars from FP
- // we will always position FP at min(240, outgoing arg area size).
- delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
- }
- else if (compiler->opts.compDbgEnC)
- {
- // vm assumption on EnC methods is that rsp and rbp are equal
- delta = 0;
- }
- else
- {
- delta = genTotalFrameSize();
- }
-
-#endif // !PLATFORM_UNIX
-
- return delta;
-}
-
-//---------------------------------------------------------------------
-// genTotalFrameSize - return the total size of the stack frame, including local size,
-// callee-saved register size, etc. For AMD64, this does not include the caller-pushed
-// return address.
-//
-// Return value:
-// Total frame size
-//
-
-int CodeGenInterface::genTotalFrameSize()
-{
- assert(!IsUninitialized(compiler->compCalleeRegsPushed));
-
- int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
-
- assert(totalFrameSize >= 0);
- return totalFrameSize;
-}
-
-//---------------------------------------------------------------------
-// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
-// This number is going to be negative, since the Caller-SP is at a higher
-// address than the frame pointer.
-//
-// There must be a frame pointer to call this function!
-//
-// We can't compute this directly from the Caller-SP, since the frame pointer
-// is based on a maximum delta from Initial-SP, so first we find SP, then
-// compute the FP offset.
-
-int CodeGenInterface::genCallerSPtoFPdelta()
-{
- assert(isFramePointerUsed());
- int callerSPtoFPdelta;
-
- callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
-
- assert(callerSPtoFPdelta <= 0);
- return callerSPtoFPdelta;
-}
-
-//---------------------------------------------------------------------
-// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
-//
-// This number will be negative.
-
-int CodeGenInterface::genCallerSPtoInitialSPdelta()
-{
- int callerSPtoSPdelta = 0;
-
- callerSPtoSPdelta -= genTotalFrameSize();
- callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
-
- // compCalleeRegsPushed does not account for the frame pointer
- // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
- if (isFramePointerUsed())
- {
- callerSPtoSPdelta -= REGSIZE_BYTES;
- }
-
- assert(callerSPtoSPdelta <= 0);
- return callerSPtoSPdelta;
-}
-#endif // _TARGET_AMD64_
-
-//-----------------------------------------------------------------------------------------
-// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
-//
-// Arguments:
-// treeNode - tree node
-//
-// Return value:
-// None
-//
-// Assumptions:
-// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
-// ii) tree type is floating point type.
-// iii) caller of this routine needs to call genProduceReg()
-void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
-{
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- assert(varTypeIsFloating(targetType));
-
- float f;
- double d;
- GenTreePtr* bitMask = nullptr;
- instruction ins = INS_invalid;
- void* cnsAddr = nullptr;
- bool dblAlign = false;
-
- switch (treeNode->OperGet())
- {
- case GT_NEG:
- // Neg(x) = flip the sign bit.
- // Neg(f) = f ^ 0x80000000
- // Neg(d) = d ^ 0x8000000000000000
- ins = genGetInsForOper(GT_XOR, targetType);
- if (targetType == TYP_FLOAT)
- {
- bitMask = &negBitmaskFlt;
-
- static_assert_no_msg(sizeof(float) == sizeof(int));
- *((int*)&f) = 0x80000000;
- cnsAddr = &f;
- }
- else
- {
- bitMask = &negBitmaskDbl;
-
- static_assert_no_msg(sizeof(double) == sizeof(__int64));
- *((__int64*)&d) = 0x8000000000000000LL;
- cnsAddr = &d;
- dblAlign = true;
- }
- break;
-
- case GT_INTRINSIC:
- assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
-
- // Abs(x) = set sign-bit to zero
- // Abs(f) = f & 0x7fffffff
- // Abs(d) = d & 0x7fffffffffffffff
- ins = genGetInsForOper(GT_AND, targetType);
- if (targetType == TYP_FLOAT)
- {
- bitMask = &absBitmaskFlt;
-
- static_assert_no_msg(sizeof(float) == sizeof(int));
- *((int*)&f) = 0x7fffffff;
- cnsAddr = &f;
- }
- else
- {
- bitMask = &absBitmaskDbl;
-
- static_assert_no_msg(sizeof(double) == sizeof(__int64));
- *((__int64*)&d) = 0x7fffffffffffffffLL;
- cnsAddr = &d;
- dblAlign = true;
- }
- break;
-
- default:
- assert(!"genSSE2: unsupported oper");
- unreached();
- break;
- }
-
- if (*bitMask == nullptr)
- {
- assert(cnsAddr != nullptr);
- *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign);
- }
-
- // We need an additional register for bitmask.
- // Make sure we have one allocated.
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
-
- // Move operand into targetReg only if the reg reserved for
- // internal purpose is not the same as targetReg.
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- assert(!op1->isContained());
- regNumber operandReg = genConsumeReg(op1);
- if (tmpReg != targetReg)
- {
- if (operandReg != targetReg)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
- }
-
- operandReg = tmpReg;
- }
-
- inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask);
- assert(ins != INS_invalid);
- inst_RV_RV(ins, targetReg, operandReg, targetType);
-}
-
-//---------------------------------------------------------------------
-// genIntrinsic - generate code for a given intrinsic
-//
-// Arguments
-// treeNode - the GT_INTRINSIC node
-//
-// Return value:
-// None
-//
-void CodeGen::genIntrinsic(GenTreePtr treeNode)
-{
- // Right now only Sqrt/Abs are treated as math intrinsics.
- switch (treeNode->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Sqrt:
- noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode,
- treeNode->gtOp.gtOp1);
- break;
-
- case CORINFO_INTRINSIC_Abs:
- genSSE2BitwiseOp(treeNode);
- break;
-
- default:
- assert(!"genIntrinsic: Unsupported intrinsic");
- unreached();
- }
-
- genProduceReg(treeNode);
-}
-
-//-------------------------------------------------------------------------- //
-// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
-//
-// Arguments
-// treeNode - the GT_PUTARG_STK node
-//
-// Return value:
-// The number of the base variable.
-//
-// Note:
-// If tail call the outgoing args are placed in the caller's incoming arg stack space.
-// Otherwise, they go in the outgoing arg area on the current frame.
-//
-// On Windows the caller always creates slots (homing space) in its frame for the
-// first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
-// For System V systems there is no such calling convention requirement, and the code needs to find
-// the first stack passed argument from the caller. This is done by iterating over
-// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
-//
-unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_PUTARG_STK);
-
- unsigned baseVarNum;
-
-#if FEATURE_FASTTAILCALL
- bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
-#else
- const bool putInIncomingArgArea = false;
-#endif
-
- // Whether to setup stk arg in incoming or out-going arg area?
- // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
- // All other calls - stk arg is setup in out-going arg area.
- if (putInIncomingArgArea)
- {
- // See the note in the function header re: finding the first stack passed argument.
- baseVarNum = getFirstArgWithStackSlot();
- assert(baseVarNum != BAD_VAR_NUM);
-
-#ifdef DEBUG
- // This must be a fast tail call.
- assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
-
- // Since it is a fast tail call, the existence of first incoming arg is guaranteed
- // because fast tail call requires that in-coming arg area of caller is >= out-going
- // arg area required for tail call.
- LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
- assert(varDsc != nullptr);
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
-#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
- // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
- assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
-#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
-#endif // !DEBUG
- }
- else
- {
-#if FEATURE_FIXED_OUT_ARGS
- baseVarNum = compiler->lvaOutgoingArgSpaceVar;
-#else // !FEATURE_FIXED_OUT_ARGS
- NYI_X86("Stack args for x86/RyuJIT");
- baseVarNum = BAD_VAR_NUM;
-#endif // !FEATURE_FIXED_OUT_ARGS
- }
-
- return baseVarNum;
-}
-
-//--------------------------------------------------------------------- //
-// genPutStructArgStk - generate code for passing an arg on the stack.
-//
-// Arguments
-// treeNode - the GT_PUTARG_STK node
-// targetType - the type of the treeNode
-//
-// Return value:
-// None
-//
-void CodeGen::genPutArgStk(GenTreePtr treeNode)
-{
- var_types targetType = treeNode->TypeGet();
-#ifdef _TARGET_X86_
- noway_assert(targetType != TYP_STRUCT);
-
- // The following logic is applicable for x86 arch.
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
- GenTreePtr data = treeNode->gtOp.gtOp1;
-
- // On a 32-bit target, all of the long arguments have been decomposed into
- // a separate putarg_stk for each of the upper and lower halves.
- noway_assert(targetType != TYP_LONG);
-
- int argSize = genTypeSize(genActualType(targetType));
- genStackLevel += argSize;
-
- // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
- if (data->isContainedIntOrIImmed())
- {
- if (data->IsIconHandle())
- {
- inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
- }
- else
- {
- inst_IV(INS_push, data->gtIntCon.gtIconVal);
- }
- }
- else if (data->isContained())
- {
- NYI_X86("Contained putarg_stk of non-constant");
- }
- else
- {
- genConsumeReg(data);
- if (varTypeIsIntegralOrI(targetType))
- {
- inst_RV(INS_push, data->gtRegNum, targetType);
- }
- else
- {
- // Decrement SP.
- inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
- getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
- }
- }
-#else // !_TARGET_X86_
- {
- unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-
- if (varTypeIsStruct(targetType))
- {
- genPutStructArgStk(treeNode, baseVarNum);
- return;
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
- noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
- // Get argument offset on stack.
- // Here we cross check that argument offset hasn't changed from lowering to codegen since
- // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
- int argOffset = treeNode->AsPutArgStk()->getArgOffset();
-
-#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
- assert(curArgTabEntry);
- assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
-#endif
-
- GenTreePtr data = treeNode->gtGetOp1();
-
- if (data->isContained())
- {
- getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
- (int)data->AsIntConCommon()->IconValue());
- }
- else
- {
- genConsumeReg(data);
- getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
- argOffset);
- }
- }
-#endif // !_TARGET_X86_
-}
-
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-
-//---------------------------------------------------------------------
-// genPutStructArgStk - generate code for copying a struct arg on the stack by value.
-// In case there are references to heap object in the struct,
-// it generates the gcinfo as well.
-//
-// Arguments
-// treeNode - the GT_PUTARG_STK node
-// baseVarNum - the variable number relative to which to put the argument on the stack.
-// For tail calls this is the baseVarNum = 0.
-// For non tail calls this is the outgoingArgSpace.
-//
-// Return value:
-// None
-//
-void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
-{
- assert(treeNode->OperGet() == GT_PUTARG_STK);
- assert(baseVarNum != BAD_VAR_NUM);
-
- var_types targetType = treeNode->TypeGet();
-
- if (varTypeIsSIMD(targetType))
- {
- regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
- assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
- getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum,
- treeNode->AsPutArgStk()->getArgOffset());
- return;
- }
-
- assert(targetType == TYP_STRUCT);
-
- GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
- if (putArgStk->gtNumberReferenceSlots == 0)
- {
- switch (putArgStk->gtPutArgStkKind)
- {
- case GenTreePutArgStk::PutArgStkKindRepInstr:
- genStructPutArgRepMovs(putArgStk, baseVarNum);
- break;
- case GenTreePutArgStk::PutArgStkKindUnroll:
- genStructPutArgUnroll(putArgStk, baseVarNum);
- break;
- default:
- unreached();
- }
- }
- else
- {
- // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
-
- // Consume these registers.
- // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
- GenTreePtr dstAddr = putArgStk;
- GenTreePtr src = putArgStk->gtOp.gtOp1;
- assert(src->OperGet() == GT_OBJ);
- GenTreePtr srcAddr = src->gtGetOp1();
-
- unsigned slots = putArgStk->gtNumSlots;
-
- // We are always on the stack we don't need to use the write barrier.
- BYTE* gcPtrs = putArgStk->gtGcPtrs;
- unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
-
- unsigned i = 0;
- unsigned copiedSlots = 0;
- while (i < slots)
- {
- switch (gcPtrs[i])
- {
- case TYPE_GC_NONE:
- // Let's see if we can use rep movsq instead of a sequence of movsq instructions
- // to save cycles and code size.
- {
- unsigned nonGcSlotCount = 0;
-
- do
- {
- nonGcSlotCount++;
- i++;
- } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
-
- // If we have a very small contiguous non-gc region, it's better just to
- // emit a sequence of movsq instructions
- if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
- {
- copiedSlots += nonGcSlotCount;
- while (nonGcSlotCount > 0)
- {
- instGen(INS_movsq);
- nonGcSlotCount--;
- }
- }
- else
- {
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
- copiedSlots += nonGcSlotCount;
- instGen(INS_r_movsq);
- }
- }
- break;
-
- case TYPE_GC_REF: // Is an object ref
- case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
- {
- // We have a GC (byref or ref) pointer
- // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
- // but the logic for emitting a GC info record is not available (it is internal for the emitter
- // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
- // instGen(INS_movsq); and emission of gc info.
-
- var_types memType;
- if (gcPtrs[i] == TYPE_GC_REF)
- {
- memType = TYP_REF;
- }
- else
- {
- assert(gcPtrs[i] == TYPE_GC_BYREF);
- memType = TYP_BYREF;
- }
-
- getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
- getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum,
- ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
-
- // Source for the copy operation.
- // If a LocalAddr, use EA_PTRSIZE - copy from stack.
- // If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
- getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI,
- TARGET_POINTER_SIZE);
-
- // Always copying to the stack - outgoing arg area
- // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
- getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
- copiedSlots++;
- gcPtrCount--;
- i++;
- }
- break;
-
- default:
- unreached();
- break;
- }
- }
-
- assert(gcPtrCount == 0);
- }
-}
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-
-/*****************************************************************************
- *
- * Create and record GC Info for the function.
- */
-#ifdef _TARGET_AMD64_
-void
-#else // !_TARGET_AMD64_
-void*
-#endif // !_TARGET_AMD64_
-CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
-{
-#ifdef JIT32_GCENCODER
- return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
-#else // !JIT32_GCENCODER
- genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
-#endif // !JIT32_GCENCODER
-}
-
-#ifdef JIT32_GCENCODER
-void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
- unsigned prologSize,
- unsigned epilogSize DEBUGARG(void* codePtr))
-{
- BYTE headerBuf[64];
- InfoHdr header;
-
- int s_cached;
-#ifdef DEBUG
- size_t headerSize =
-#endif
- compiler->compInfoBlkSize =
- gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
-
- size_t argTabOffset = 0;
- size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
-
-#if DISPLAY_SIZES
-
- if (genInterruptible)
- {
- gcHeaderISize += compiler->compInfoBlkSize;
- gcPtrMapISize += ptrMapSize;
- }
- else
- {
- gcHeaderNSize += compiler->compInfoBlkSize;
- gcPtrMapNSize += ptrMapSize;
- }
-
-#endif // DISPLAY_SIZES
-
- compiler->compInfoBlkSize += ptrMapSize;
-
- /* Allocate the info block for the method */
-
- compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
-
-#if 0 // VERBOSE_SIZES
- // TODO-X86-Cleanup: 'dataSize', below, is not defined
-
-// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
- {
- printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
- compiler->info.compILCodeSize,
- compiler->compInfoBlkSize,
- codeSize + dataSize,
- codeSize + dataSize - prologSize - epilogSize,
- 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
- 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
- compiler->info.compClassName,
- compiler->info.compMethodName);
-}
-
-#endif
-
- /* Fill in the info block and return it to the caller */
-
- void* infoPtr = compiler->compInfoBlkAddr;
-
- /* Create the method info block: header followed by GC tracking tables */
-
- compiler->compInfoBlkAddr +=
- gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
-
- assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
- compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
- assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
-
-#ifdef DEBUG
-
- if (0)
- {
- BYTE* temp = (BYTE*)infoPtr;
- unsigned size = compiler->compInfoBlkAddr - temp;
- BYTE* ptab = temp + headerSize;
-
- noway_assert(size == headerSize + ptrMapSize);
-
- printf("Method info block - header [%u bytes]:", headerSize);
-
- for (unsigned i = 0; i < size; i++)
- {
- if (temp == ptab)
- {
- printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
- printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
- }
- else
- {
- if (!(i % 16))
- printf("\n %04X: ", i);
- }
-
- printf("%02X ", *temp++);
- }
-
- printf("\n");
- }
-
-#endif // DEBUG
-
-#if DUMP_GC_TABLES
-
- if (compiler->opts.dspGCtbls)
- {
- const BYTE* base = (BYTE*)infoPtr;
- unsigned size;
- unsigned methodSize;
- InfoHdr dumpHeader;
-
- printf("GC Info for method %s\n", compiler->info.compFullName);
- printf("GC info size = %3u\n", compiler->compInfoBlkSize);
-
- size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
- // printf("size of header encoding is %3u\n", size);
- printf("\n");
-
- if (compiler->opts.dspGCtbls)
- {
- base += size;
- size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
- // printf("size of pointer table is %3u\n", size);
- printf("\n");
- noway_assert(compiler->compInfoBlkAddr == (base + size));
- }
- }
-
-#ifdef DEBUG
- if (jitOpts.testMask & 128)
- {
- for (unsigned offs = 0; offs < codeSize; offs++)
- {
- gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
- }
- }
-#endif // DEBUG
-#endif // DUMP_GC_TABLES
-
- /* Make sure we ended up generating the expected number of bytes */
-
- noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
-
- return infoPtr;
-}
-
-#else // !JIT32_GCENCODER
-void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
-{
- IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
- GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
- GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
- assert(gcInfoEncoder);
-
- // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
- gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
-
- // First we figure out the encoder ID's for the stack slots and registers.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
- // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
- gcInfoEncoder->FinalizeSlotIds();
- // Now we can actually use those slot ID's to declare live ranges.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-
-#if defined(DEBUGGING_SUPPORT)
- if (compiler->opts.compDbgEnC)
- {
- // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
- // which is:
- // -return address
- // -saved off RBP
- // -saved 'this' pointer and bool for synchronized methods
-
- // 4 slots for RBP + return address + RSI + RDI
- int preservedAreaSize = 4 * REGSIZE_BYTES;
-
- if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
- {
- if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
- {
- preservedAreaSize += REGSIZE_BYTES;
- }
-
- // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
- preservedAreaSize += 4;
- }
-
- // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
- // frame
- gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
- }
-#endif
-
- gcInfoEncoder->Build();
-
- // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
- // let's save the values anyway for debugging purposes
- compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
- compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
-}
-#endif // !JIT32_GCENCODER
-
-/*****************************************************************************
- * Emit a call to a helper function.
- *
- */
-
-void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg)
-{
- void* addr = nullptr;
- void* pAddr = nullptr;
-
- emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
- addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
- regNumber callTarget = REG_NA;
- regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
-
- if (!addr)
- {
- assert(pAddr != nullptr);
-
- // Absolute indirect call addr
- // Note: Order of checks is important. First always check for pc-relative and next
- // zero-relative. Because the former encoding is 1-byte smaller than the latter.
- if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
- genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
- {
- // generate call whose target is specified by 32-bit offset relative to PC or zero.
- callType = emitter::EC_FUNC_TOKEN_INDIR;
- addr = pAddr;
- }
- else
- {
-#ifdef _TARGET_AMD64_
- // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
- // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
- // make the call.
- // mov reg, addr
- // call [reg]
-
- if (callTargetReg == REG_NA)
- {
- // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
- // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
- callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
- regMaskTP callTargetMask = genRegMask(callTargetReg);
- noway_assert((callTargetMask & killMask) == callTargetMask);
- }
- else
- {
- // The call target must not overwrite any live variable, though it may not be in the
- // kill set for the call.
- regMaskTP callTargetMask = genRegMask(callTargetReg);
- noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
- }
-#endif
-
- callTarget = callTargetReg;
- CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL);
- callType = emitter::EC_INDIR_ARD;
- }
- }
-
- getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
- retSize FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
- BAD_IL_OFFSET, // IL offset
- callTarget, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, // isJump
- emitter::emitNoGChelper(helper));
-
- regTracker.rsTrashRegSet(killMask);
- regTracker.rsTrashRegsForGCInterruptability();
-}
-
-#if !defined(_TARGET_64BIT_)
-//-----------------------------------------------------------------------------
-//
-// Code Generation for Long integers
-//
-//-----------------------------------------------------------------------------
-
-//------------------------------------------------------------------------
-// genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
-//
-// Arguments:
-// treeNode - A TYP_LONG lclVar node.
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
-// Its operand must be a GT_LONG node.
-//
-void CodeGen::genStoreLongLclVar(GenTree* treeNode)
-{
- emitter* emit = getEmitter();
-
- GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
- unsigned lclNum = lclNode->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- assert(varDsc->TypeGet() == TYP_LONG);
- assert(!varDsc->lvPromoted);
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- noway_assert(op1->OperGet() == GT_LONG);
- genConsumeRegs(op1);
-
- // Definitions of register candidates will have been lowered to 2 int lclVars.
- assert(!treeNode->InReg());
-
- GenTreePtr loVal = op1->gtGetOp1();
- GenTreePtr hiVal = op1->gtGetOp2();
- // NYI: Contained immediates.
- NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
-}
-#endif // !defined(_TARGET_64BIT_)
-
-/*****************************************************************************
-* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
-* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
-* disassembler thinks the instructions as the same as we do.
-*/
-
-// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
-// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
-//#define ALL_XARCH_EMITTER_UNIT_TESTS
-
-#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
-void CodeGen::genAmd64EmitterUnitTests()
-{
- if (!verbose)
- {
- return;
- }
-
- if (!compiler->opts.altJit)
- {
- // No point doing this in a "real" JIT.
- return;
- }
-
- // Mark the "fake" instructions in the output.
- printf("*************** In genAmd64EmitterUnitTests()\n");
-
- // We use this:
- // genDefineTempLabel(genCreateTempLabel());
- // to create artificial labels to help separate groups of tests.
-
- //
- // Loads
- //
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef ALL_XARCH_EMITTER_UNIT_TESTS
-#ifdef FEATURE_AVX_SUPPORT
- genDefineTempLabel(genCreateTempLabel());
-
- // vhaddpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddps xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulps xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vandps xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vandpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vandps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vandpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vorps xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vorpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vorps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vorpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
-
- // vdivss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
-#endif // FEATURE_AVX_SUPPORT
-#endif // ALL_XARCH_EMITTER_UNIT_TESTS
- printf("*************** End of genAmd64EmitterUnitTests()\n");
-}
-
-#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
-
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
- VarName name = nullptr;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-#endif // DEBUGGING_SUPPORT
-
-#endif // _TARGET_AMD64_
-
-#endif // !LEGACY_BACKEND
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Amd64/x86 Code Generator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef _TARGET_XARCH_
+#include "emit.h"
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+// Get the register assigned to the given node
+
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+ return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+// tree - the lclVar node for the variable being spilled
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ assert(varDsc->lvIsRegCandidate());
+
+ // We don't actually need to spill if it is already living in memory
+ bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+ if (needsSpill)
+ {
+ var_types lclTyp = varDsc->TypeGet();
+ if (varDsc->lvNormalizeOnStore())
+ {
+ lclTyp = genActualType(lclTyp);
+ }
+ emitAttr size = emitTypeSize(lclTyp);
+
+ bool restoreRegVar = false;
+ if (tree->gtOper == GT_REG_VAR)
+ {
+ tree->SetOper(GT_LCL_VAR);
+ restoreRegVar = true;
+ }
+
+ // mask off the flag to generate the right spill code, then bring it back
+ tree->gtFlags &= ~GTF_REG_VAL;
+
+ instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
+#if CPU_LONG_USES_REGPAIR
+ if (varTypeIsMultiReg(tree))
+ {
+ assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
+ assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
+ regNumber regLo = genRegPairLo(tree->gtRegPair);
+ regNumber regHi = genRegPairHi(tree->gtRegPair);
+ inst_TT_RV(storeIns, tree, regLo);
+ inst_TT_RV(storeIns, tree, regHi, 4);
+ }
+ else
+#endif
+ {
+ assert(varDsc->lvRegNum == tree->gtRegNum);
+ inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
+ }
+ tree->gtFlags |= GTF_REG_VAL;
+
+ if (restoreRegVar)
+ {
+ tree->SetOper(GT_REG_VAR);
+ }
+
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+ gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+ if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+ {
+#ifdef DEBUG
+ if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+ }
+#endif
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+
+ tree->gtFlags &= ~GTF_SPILL;
+ varDsc->lvRegNum = REG_STK;
+ if (varTypeIsMultiReg(tree))
+ {
+ varDsc->lvOtherReg = REG_STK;
+ }
+}
+
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
+{
+ assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+ varDsc->lvRegNum = tree->gtRegNum;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+ // Reg cannot be a FP reg
+ assert(!genIsValidFloatReg(reg));
+
+ // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+ // relocatable. Other ref type constants (e.g. string objects) go through a different
+ // code path.
+ noway_assert(type != TYP_REF || val == 0);
+
+ if (val == 0)
+ {
+ instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
+ }
+ else
+ {
+ // TODO-XArch-CQ: needs all the optimized cases
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code to check that the GS cookie wasn't thrashed by a buffer
+ * overrun. If pushReg is true, preserve all registers around code sequence.
+ * Otherwise ECX could be modified.
+ *
+ * Implementation Note: pushReg = true, in case of tail calls.
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
+ // executing GS cookie check will not collect the object pointed to by EAX.
+ //
+ // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX
+ // In such case make sure that the correct GC-ness of RDX is reported as well, so
+ // a GC object pointed by RDX will not be collected.
+ if (!pushReg)
+ {
+ // Handle multi-reg return type values
+ if (compiler->compMethodReturnsMultiRegRetType())
+ {
+ ReturnTypeDesc retTypeDesc;
+ if (varTypeIsLong(compiler->info.compRetNativeType))
+ {
+ retTypeDesc.InitializeLongReturnType(compiler);
+ }
+ else // we must have a struct return type
+ {
+ retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
+ }
+
+ unsigned regCount = retTypeDesc.GetReturnRegCount();
+
+ // Only x86 and x64 Unix ABI allows multi-reg return and
+ // number of result regs should be equal to MAX_RET_REG_COUNT.
+ assert(regCount == MAX_RET_REG_COUNT);
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
+ }
+ }
+ else if (compiler->compMethodReturnsRetBufAddr())
+ {
+ // This is for returning in an implicit RetBuf.
+ // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef.
+
+ // In case the return is in an implicit RetBuf, the native return type should be a struct
+ assert(varTypeIsStruct(compiler->info.compRetNativeType));
+
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
+ }
+ // ... all other cases.
+ else
+ {
+#ifdef _TARGET_AMD64_
+ // For x64, structs that are not returned in registers are always
+ // returned in implicit RetBuf. If we reached here, we should not have
+ // a RetBuf and the return type should not be a struct.
+ assert(compiler->info.compRetBuffArg == BAD_VAR_NUM);
+ assert(!varTypeIsStruct(compiler->info.compRetNativeType));
+#endif // _TARGET_AMD64_
+
+ // For x86 Windows we can't make such assertions since we generate code for returning of
+ // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise
+ // compRetNativeType could be TYP_STRUCT.
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
+ }
+ }
+
+ regNumber regGSCheck;
+ if (!pushReg)
+ {
+ // Non-tail call: we can use any callee trash register that is not
+ // a return register or contain 'this' pointer (keep alive this), since
+ // we are generating GS cookie check after a GT_RETURN block.
+ // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well
+ // as return register for two-register-returned structs.
+ if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
+ (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0))
+ {
+ regGSCheck = REG_ARG_1;
+ }
+ else
+ {
+ regGSCheck = REG_ARG_0;
+ }
+ }
+ else
+ {
+#ifdef _TARGET_X86_
+ NYI_X86("Tail calls from methods that need GS check");
+ regGSCheck = REG_NA;
+#else // !_TARGET_X86_
+ // Tail calls from methods that need GS check: We need to preserve registers while
+ // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
+ // check, we might need a register. This won't be an issue for jmp calls for the
+ // reason mentioned below (see comment starting with "Jmp Calls:").
+ //
+ // The following are the possible solutions in case of tail prefixed calls:
+ // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
+ // present in methods that require GS cookie check. Rest of the tail calls that
+ // do not require R11 will be honored.
+ // 2) Internal register - GT_CALL node reserves an internal register and emits GS
+ // cookie check as part of tail call codegen. GenExitCode() needs to special case
+ // fast tail calls implemented as epilog+jmp or such tail calls should always get
+ // dispatched via helper.
+ // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
+ // right execution order during rationalization.
+ //
+ // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
+ // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed
+ // VSD calls from methods that need GS check.
+ //
+ // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
+ // ignores tail prefix. In future, if we intend to support tail calls from such a method,
+ // consider one of the options mentioned above. For now adding an assert that we don't
+ // expect to see a tail call in a method that requires GS check.
+ noway_assert(!compiler->compTailCallUsed);
+
+ // Jmp calls: specify method handle using which JIT queries VM for its entry point
+ // address and hence it can neither be a VSD call nor PInvoke calli with cookie
+ // parameter. Therefore, in case of jmp calls it is safe to use R11.
+ regGSCheck = REG_R11;
+#endif // !_TARGET_X86_
+ }
+
+ if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+ {
+ // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
+ // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
+ if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
+ {
+ genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+ }
+ else
+ {
+ getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
+ (int)compiler->gsGlobalSecurityCookieVal);
+ }
+ }
+ else
+ {
+ // Ngen case - GS cookie value needs to be accessed through an indirection.
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+ }
+
+ BasicBlock* gsCheckBlk = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, gsCheckBlk);
+ genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+ genDefineTempLabel(gsCheckBlk);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for all the basic blocks in the function.
+ */
+
+void CodeGen::genCodeForBBlist()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ unsigned savedStkLvl;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+
+ // You have to be careful if you create basic blocks from now on
+ compiler->fgSafeBasicBlockCreation = false;
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnCall)
+ {
+ compiler->opts.compStackCheckOnCall = false;
+ }
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnRet)
+ {
+ compiler->opts.compStackCheckOnRet = false;
+ }
+#endif // DEBUG
+
+ // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+ genPrepForEHCodegen();
+
+ assert(!compiler->fgFirstBBScratch ||
+ compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+ /* Initialize the spill tracking logic */
+
+ regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+ /* Initialize the line# tracking logic */
+
+ if (compiler->opts.compScopeInfo)
+ {
+ siInit();
+ }
+#endif
+
+ // The current implementation of switch tables requires the first block to have a label so it
+ // can generate offsets to the switch label targets.
+ // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
+ if (compiler->fgHasSwitch)
+ {
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ }
+
+ genPendingCallLabel = nullptr;
+
+ /* Initialize the pointer tracking code */
+
+ gcInfo.gcRegPtrSetInit();
+ gcInfo.gcVarPtrSetInit();
+
+ /* If any arguments live in registers, mark those regs as such */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter assigned to a register? */
+
+ if (!varDsc->lvIsParam || !varDsc->lvRegister)
+ {
+ continue;
+ }
+
+ /* Is the argument live on entry to the method? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ /* Is this a floating-point argument? */
+
+ if (varDsc->IsFloatRegType())
+ {
+ continue;
+ }
+
+ noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+ /* Mark the register as holding the variable */
+
+ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ }
+
+ unsigned finallyNesting = 0;
+
+ // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+ // allocation at the start of each basic block.
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks and generate code for each one
+ *
+ */
+
+ BasicBlock* block;
+ BasicBlock* lblk; /* previous block */
+
+ for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n=============== Generating ");
+ block->dspBlockHeader(compiler, true, true);
+ compiler->fgDispBBLiveness(block);
+ }
+#endif // DEBUG
+
+ // Figure out which registers hold variables on entry to this block
+
+ regSet.ClearMaskVars();
+ gcInfo.gcRegGCrefSetCur = RBM_NONE;
+ gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+ genUpdateLife(block->bbLiveIn);
+
+ // Even if liveness didn't change, we need to update the registers containing GC references.
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+ // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+ // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+ regMaskTP newLiveRegSet = RBM_NONE;
+ regMaskTP newRegGCrefSet = RBM_NONE;
+ regMaskTP newRegByrefSet = RBM_NONE;
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
+#endif
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ if (varDsc->lvIsInReg())
+ {
+ newLiveRegSet |= varDsc->lvRegMask();
+ if (varDsc->lvType == TYP_REF)
+ {
+ newRegGCrefSet |= varDsc->lvRegMask();
+ }
+ else if (varDsc->lvType == TYP_BYREF)
+ {
+ newRegByrefSet |= varDsc->lvRegMask();
+ }
+#ifdef DEBUG
+ if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ else if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
+ }
+#endif // DEBUG
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ }
+
+ regSet.rsMaskVars = newLiveRegSet;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ if (!VarSetOps::IsEmpty(compiler, addedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tAdded GCVars: ");
+ dumpConvertedVarSet(compiler, addedGCVars);
+ printf("\n");
+ }
+ if (!VarSetOps::IsEmpty(compiler, removedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tRemoved GCVars: ");
+ dumpConvertedVarSet(compiler, removedGCVars);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
+ gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
+
+ /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+ represent the exception object (TYP_REF).
+ We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+ to the block, it will be the first thing evaluated
+ (thanks to GTF_ORDER_SIDEEFF).
+ */
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ for (GenTree* node : LIR::AsRange(block))
+ {
+ if (node->OperGet() == GT_CATCH_ARG)
+ {
+ gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+ break;
+ }
+ }
+ }
+
+ /* Start a new code output block */
+
+ genUpdateCurrentFunclet(block);
+
+ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+ {
+ getEmitter()->emitLoopAlign();
+ }
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+ }
+#endif
+
+ block->bbEmitCookie = nullptr;
+
+ if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+ {
+ /* Mark a label and update the current set of live GC refs */
+
+ block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, FALSE);
+ }
+
+ if (block == compiler->fgFirstColdBlock)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nThis is the start of the cold region of the method\n");
+ }
+#endif
+ // We should never have a block that falls through into the Cold section
+ noway_assert(!lblk->bbFallsThrough());
+
+ // We require the block that starts the Cold section to have a label
+ noway_assert(block->bbEmitCookie);
+ getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+ }
+
+ /* Both stacks are always empty on entry to a basic block */
+
+ genStackLevel = 0;
+
+ savedStkLvl = genStackLevel;
+
+ /* Tell everyone which basic block we're working on */
+
+ compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+ siBeginBlock(block);
+
+ // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+ if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
+ !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
+ // emit a NO_MAPPING entry, immediately after the prolog.
+ {
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+ }
+
+ bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+ /*---------------------------------------------------------------------
+ *
+ * Generate code for each statement-tree in the block
+ *
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ genReserveFuncletProlog(block);
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ // Clear compCurStmt and compCurLifeTree.
+ compiler->compCurStmt = nullptr;
+ compiler->compCurLifeTree = nullptr;
+
+ // Traverse the block in linear order, generating code for each node as we
+ // as we encounter it.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUGGING_SUPPORT
+ IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
+#endif
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+#ifdef DEBUGGING_SUPPORT
+ // Do we have a new IL offset?
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ genEnsureCodeEmitted(currentILOffset);
+ currentILOffset = node->gtStmt.gtStmtILoffsx;
+ genIPmappingAdd(currentILOffset, firstMapping);
+ firstMapping = false;
+ }
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+ node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+ if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+ node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+ {
+ while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
+ {
+ genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
+ }
+ }
+ }
+#endif // DEBUG
+
+ genCodeForTreeNode(node);
+ if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
+ {
+ genConsumeReg(node);
+ }
+ } // end for each node in block
+
+#ifdef DEBUG
+ // The following set of register spill checks and GC pointer tracking checks used to be
+ // performed at statement boundaries. Now, with LIR, there are no statements, so they are
+ // performed at the end of each block.
+ // TODO: could these checks be performed more frequently? E.g., at each location where
+ // the register allocator says there are no live non-variable registers. Perhaps this could
+ // be done by (a) keeping a running count of live non-variable registers by using
+ // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
+ // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
+ // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
+ // there will be no live non-variable registers.
+
+ regSet.rsSpillChk();
+
+ /* Make sure we didn't bungle pointer register tracking */
+
+ regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+ regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+ // If return is a GC-type, clear it. Note that if a common
+ // epilog is generated (genReturnBB) it has a void return
+ // even though we might return a ref. We can't use the compRetType
+ // as the determiner because something we are tracking as a byref
+ // might be used as a return value of a int function (which is legal)
+ GenTree* blockLastNode = block->lastNode();
+ if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
+ (varTypeIsGC(compiler->info.compRetType) ||
+ (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
+ {
+ nonVarPtrRegs &= ~RBM_INTRET;
+ }
+
+ if (nonVarPtrRegs)
+ {
+ printf("Regset after BB%02u gcr=", block->bbNum);
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ printf(", byr=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ printf(", regVars=");
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf("\n");
+ }
+
+ noway_assert(nonVarPtrRegs == RBM_NONE);
+#endif // DEBUG
+
+#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+ if (block->bbNext == nullptr)
+ {
+ // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
+ // (it's as good as any, but better than the prolog, which can only be a single instruction
+ // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
+ // thinks the instructions are the same as we do.
+ genAmd64EmitterUnitTests();
+ }
+#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
+
+#ifdef DEBUGGING_SUPPORT
+ // It is possible to reach the end of the block without generating code for the current IL offset.
+ // For example, if the following IR ends the current block, no code will have been generated for
+ // offset 21:
+ //
+ // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
+ //
+ // N001 ( 0, 0) [000039] ------------ nop void
+ //
+ // This can lead to problems when debugging the generated code. To prevent these issues, make sure
+ // we've generated code for the last IL offset we saw in the block.
+ genEnsureCodeEmitted(currentILOffset);
+
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ siEndBlock(block);
+
+ /* Is this the last block, and are there any open scopes left ? */
+
+ bool isLastBlockProcessed = (block->bbNext == nullptr);
+ if (block->isBBCallAlwaysPair())
+ {
+ isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
+ }
+
+ if (isLastBlockProcessed && siOpenScopeList.scNext)
+ {
+ /* This assert no longer holds, because we may insert a throw
+ block to demarcate the end of a try or finally region when they
+ are at the end of the method. It would be nice if we could fix
+ our code so that this throw block will no longer be necessary. */
+
+ // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+ siCloseAllOpenScopes();
+ }
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+ genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+ // compCurLife should be equal to the liveOut set, except that we don't keep
+ // it up to date for vars that are not register candidates
+ // (it would be nice to have a xor set function)
+
+ VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+ VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+ VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+ while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ assert(!varDsc->lvIsRegCandidate());
+ }
+#endif
+
+ /* Both stacks should always be empty on exit from a basic block */
+ noway_assert(genStackLevel == 0);
+
+#ifdef _TARGET_AMD64_
+ // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+ // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+ // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+ // The document "X64 and ARM ABIs.docx" has more details. The situations:
+ // 1. If the call instruction is in a different EH region as the instruction that follows it.
+ // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+ // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
+ // here.)
+ // We handle case #1 here, and case #2 in the emitter.
+ if (getEmitter()->emitIsLastInsCall())
+ {
+ // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+ // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+ // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+ // generated before the OS epilog starts, such as a GS cookie check.
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_THROW:
+ case BBJ_CALLFINALLY:
+ case BBJ_EHCATCHRET:
+ // We're going to generate more code below anyway, so no need for the NOP.
+
+ case BBJ_RETURN:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ // These are the "epilog follows" case, handled in the emitter.
+
+ break;
+
+ case BBJ_NONE:
+ if (block->bbNext == nullptr)
+ {
+ // Call immediately before the end of the code; we should never get here .
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ else
+ {
+ // We need the NOP
+ instGen(INS_nop);
+ }
+ break;
+
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ // These can't have a call as the last instruction!
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+ }
+#endif // _TARGET_AMD64_
+
+ /* Do we need to generate a jump or return? */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+ break;
+
+ case BBJ_RETURN:
+ genExitCode(block);
+ break;
+
+ case BBJ_THROW:
+ // If we have a throw at the end of a function or funclet, we need to emit another instruction
+ // afterwards to help the OS unwinder determine the correct context during unwind.
+ // We insert an unexecuted breakpoint instruction in several situations
+ // following a throw instruction:
+ // 1. If the throw is the last instruction of the function or funclet. This helps
+ // the OS unwinder determine the correct context during an unwind from the
+ // thrown exception.
+ // 2. If this is this is the last block of the hot section.
+ // 3. If the subsequent block is a special throw block.
+ // 4. On AMD64, if the next block is in a different EH region.
+ if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
+ !BasicBlock::sameEHRegion(block, block->bbNext) ||
+ (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+ block->bbNext == compiler->fgFirstColdBlock)
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+
+ break;
+
+ case BBJ_CALLFINALLY:
+
+#if FEATURE_EH_FUNCLETS
+
+ // Generate a call to the finally, like this:
+ // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
+ // call finally-funclet
+ // jmp finally-return // Only for non-retless finally calls
+ // The jmp can be a NOP if we're going to the next block.
+ // If we're generating code for the main function (not a funclet), and there is no localloc,
+ // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP
+ // instead of loading the PSPSym in this case.
+
+ if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
+ }
+ getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
+
+ if (block->bbFlags & BBF_RETLESS_CALL)
+ {
+ // We have a retless call, and the last instruction generated was a call.
+ // If the next block is in a different EH region (or is the end of the code
+ // block), then we need to generate a breakpoint here (since it will never
+ // get executed) to get proper unwind behavior.
+
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ }
+ else
+ {
+ // Because of the way the flowgraph is connected, the liveness info for this one instruction
+ // after the call is not (can not be) correct in cases where a variable has a last use in the
+ // handler. So turn off GC reporting for this single instruction.
+ getEmitter()->emitDisableGC();
+
+ // Now go to where the finally funclet needs to return to.
+ if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
+ {
+ // Fall-through.
+ // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
+ // to the next instruction? This would depend on stack walking from within the finally
+ // handler working without this instruction being in this special EH region.
+ instGen(INS_nop);
+ }
+ else
+ {
+ inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
+ }
+
+ getEmitter()->emitEnableGC();
+ }
+
+#else // !FEATURE_EH_FUNCLETS
+
+ // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
+ // corresponding to the finally's nesting level. When invoked in response to an exception, the
+ // EE does this.
+ //
+ // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
+ //
+ // We will emit :
+ // mov [ebp - (n + 1)], 0
+ // mov [ebp - n ], 0xFC
+ // push &step
+ // jmp finallyBlock
+ // ...
+ // step:
+ // mov [ebp - n ], 0
+ // jmp leaveTarget
+ // ...
+ // leaveTarget:
+
+ noway_assert(isFramePointerUsed());
+
+ // Get the nesting level which contains the finally
+ compiler->fgGetNestingLevel(block, &finallyNesting);
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ filterEndOffsetSlotOffs =
+ (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
+
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
+
+ // Zero out the slot for the next nesting level
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs - TARGET_POINTER_SIZE);
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs);
+
+ // Now push the address where the finally funclet should return to directly.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+ getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
+ }
+ else
+ {
+ // EE expects a DWORD, so we give him 0
+ inst_IV(INS_push_hide, 0);
+ }
+
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ lblk = block;
+ block = block->bbNext;
+ }
+
+ break;
+
+#if FEATURE_EH_FUNCLETS
+
+ case BBJ_EHCATCHRET:
+ // Set RAX to the address the VM should return to after the catch.
+ // Generate a RIP-relative
+ // lea reg, [rip + disp32] ; the RIP is implicit
+ // which will be position-indepenent.
+ getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+#else // !FEATURE_EH_FUNCLETS
+
+ case BBJ_EHCATCHRET:
+ noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ {
+ // The last statement of the block must be a GT_RETFILT, which has already been generated.
+ assert(block->lastNode() != nullptr);
+ assert(block->lastNode()->OperGet() == GT_RETFILT);
+
+ if (block->bbJumpKind == BBJ_EHFINALLYRET)
+ {
+ assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
+
+ // Return using a pop-jmp sequence. As the "try" block calls
+ // the finally with a jmp, this leaves the x86 call-ret stack
+ // balanced in the normal flow of path.
+
+ noway_assert(isFramePointerRequired());
+ inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+ inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
+ }
+ else
+ {
+ assert(block->bbJumpKind == BBJ_EHFILTERRET);
+
+ // The return value has already been computed.
+ instGen_Return(0);
+ }
+ }
+ break;
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->compCurBB = nullptr;
+#endif
+
+ } //------------------ END-FOR each block of the method -------------------
+
+ /* Nothing is live at this point */
+ genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+ /* Finalize the spill tracking logic */
+
+ regSet.rsSpillEnd();
+
+ /* Finalize the temp tracking logic */
+
+ compiler->tmpEnd();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n# ");
+ printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
+ compiler->compSizeEstimate);
+ printf("%s\n", compiler->info.compFullName);
+ }
+#endif
+}
+
+// return the child that has the same reg as the dst (if any)
+// other child returned (out param) in 'other'
+GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+{
+ if (tree->gtRegNum == REG_NA)
+ {
+ other = nullptr;
+ return nullptr;
+ }
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ if (op1->gtRegNum == tree->gtRegNum)
+ {
+ other = op2;
+ return op1;
+ }
+ if (op2->gtRegNum == tree->gtRegNum)
+ {
+ other = op1;
+ return op2;
+ }
+ else
+ {
+ other = nullptr;
+ return nullptr;
+ }
+}
+
+// Move an immediate value into an integer register
+
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+ // reg cannot be a FP register
+ assert(!genIsValidFloatReg(reg));
+
+ if (!compiler->opts.compReloc)
+ {
+ size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+ }
+
+ if ((imm == 0) && !EA_IS_RELOC(size))
+ {
+ instGen_Set_Reg_To_Zero(size, reg, flags);
+ }
+ else
+ {
+ if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
+ {
+ getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+ }
+ }
+ regTracker.rsTrackRegIntCns(reg, imm);
+}
+
+/***********************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
+{
+
+ switch (tree->gtOper)
+ {
+ case GT_CNS_INT:
+ {
+ // relocatable values tend to come down as a CNS_INT of native int type
+ // so the line between these two opcodes is kind of blurry
+ GenTreeIntConCommon* con = tree->AsIntConCommon();
+ ssize_t cnsVal = con->IconValue();
+
+ if (con->ImmedValNeedsReloc(compiler))
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+ regTracker.rsTrackRegTrash(targetReg);
+ }
+ else
+ {
+ genSetRegToIcon(targetReg, cnsVal, targetType);
+ }
+ }
+ break;
+
+ case GT_CNS_DBL:
+ {
+ double constValue = tree->gtDblCon.gtDconVal;
+
+ // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0)
+ if (*(__int64*)&constValue == 0)
+ {
+ // A faster/smaller way to generate 0
+ instruction ins = genGetInsForOper(GT_XOR, targetType);
+ inst_RV_RV(ins, targetReg, targetReg, targetType);
+ }
+ else
+ {
+ GenTreePtr cns;
+ if (targetType == TYP_FLOAT)
+ {
+ float f = forceCastToFloat(constValue);
+ cns = genMakeConst(&f, targetType, tree, false);
+ }
+ else
+ {
+ cns = genMakeConst(&constValue, targetType, tree, true);
+ }
+
+ inst_RV_TT(ins_Load(targetType), targetReg, cns);
+ }
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+// Generate code to get the high N bits of a N*N=2N bit multiplication result
+void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
+{
+ assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ assert(!treeNode->gtOverflowEx());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+ emitAttr size = emitTypeSize(treeNode);
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+ GenTree* op2 = treeNode->gtOp.gtOp2;
+
+ // to get the high bits of the multiply, we are constrained to using the
+ // 1-op form: RDX:RAX = RAX * rm
+ // The 3-op form (Rx=Ry*Rz) does not support it.
+
+ genConsumeOperands(treeNode->AsOp());
+
+ GenTree* regOp = op1;
+ GenTree* rmOp = op2;
+
+ // Set rmOp to the contained memory operand (if any)
+ //
+ if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
+ {
+ regOp = op2;
+ rmOp = op1;
+ }
+ assert(!regOp->isContained());
+
+ // Setup targetReg when neither of the source operands was a matching register
+ if (regOp->gtRegNum != targetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
+ }
+
+ emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+
+ // Move the result to the desired register, if necessary
+ if (targetReg != REG_RDX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+ }
+}
+
+// generate code for a DIV or MOD operation
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
+{
+ GenTree* dividend = treeNode->gtOp1;
+ GenTree* divisor = treeNode->gtOp2;
+ genTreeOps oper = treeNode->OperGet();
+ emitAttr size = emitTypeSize(treeNode);
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+ // dividend is not contained.
+ assert(!dividend->isContained());
+
+ genConsumeOperands(treeNode->AsOp());
+ if (varTypeIsFloating(targetType))
+ {
+ // divisor is not contained or if contained is a memory op.
+ // Note that a reg optional operand is a treated as a memory op
+ // if no register is allocated to it.
+ assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
+ divisor->IsRegOptional());
+
+ // Floating point div/rem operation
+ assert(oper == GT_DIV || oper == GT_MOD);
+
+ if (dividend->gtRegNum == targetReg)
+ {
+ emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
+ }
+ else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
+ {
+ // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
+ // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
+ // LSRA has to make sure that such a register assignment is not generated for floating
+ // point div/rem operations.
+ noway_assert(
+ !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
+ emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
+ }
+ }
+ else
+ {
+ // dividend must be in RAX
+ if (dividend->gtRegNum != REG_RAX)
+ {
+ inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
+ }
+
+ // zero or sign extend rax to rdx
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
+ }
+ else
+ {
+ emit->emitIns(INS_cdq, size);
+ // the cdq instruction writes RDX, So clear the gcInfo for RDX
+ gcInfo.gcMarkRegSetNpt(RBM_RDX);
+ }
+
+ // Perform the 'targetType' (64-bit or 32-bit) divide instruction
+ instruction ins;
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ {
+ ins = INS_div;
+ }
+ else
+ {
+ ins = INS_idiv;
+ }
+
+ emit->emitInsBinary(ins, size, treeNode, divisor);
+
+ // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
+ // Move the result to the desired register, if necessary
+ if (oper == GT_DIV || oper == GT_UDIV)
+ {
+ if (targetReg != REG_RAX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
+ }
+ }
+ else
+ {
+ assert((oper == GT_MOD) || (oper == GT_UMOD));
+ if (targetReg != REG_RDX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+ }
+ }
+ }
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCodeForBinary: Generate code for many binary arithmetic operators
+// This method is expected to have called genConsumeOperands() before calling it.
+//
+// Arguments:
+// treeNode - The binary operation for which we are generating code.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Mul and div variants have special constraints on x64 so are not handled here.
+// See teh assert below for the operators that are handled.
+
+void CodeGen::genCodeForBinary(GenTree* treeNode)
+{
+ const genTreeOps oper = treeNode->OperGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+#if defined(_TARGET_64BIT_)
+ assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
+#else // !defined(_TARGET_64BIT_)
+ assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
+ oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI ||
+ oper == GT_ADD || oper == GT_SUB);
+#endif // !defined(_TARGET_64BIT_)
+
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ GenTreePtr op2 = treeNode->gtGetOp2();
+
+ // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
+ if (op1->isContained())
+ {
+ assert(treeNode->OperIsCommutative());
+ assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
+
+ op1 = treeNode->gtGetOp2();
+ op2 = treeNode->gtGetOp1();
+ }
+
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ noway_assert(targetReg != REG_NA);
+
+ regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum;
+ regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum;
+
+ GenTreePtr dst;
+ GenTreePtr src;
+
+ // This is the case of reg1 = reg1 op reg2
+ // We're ready to emit the instruction without any moves
+ if (op1reg == targetReg)
+ {
+ dst = op1;
+ src = op2;
+ }
+ // We have reg1 = reg2 op reg1
+ // In order for this operation to be correct
+ // we need that op is a commutative operation so
+ // we can convert it into reg1 = reg1 op reg2 and emit
+ // the same code as above
+ else if (op2reg == targetReg)
+ {
+ noway_assert(GenTree::OperIsCommutative(oper));
+ dst = op2;
+ src = op1;
+ }
+ // now we know there are 3 different operands so attempt to use LEA
+ else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
+ && (op2->isContainedIntOrIImmed() || !op2->isContained()))
+ {
+ if (op2->isContainedIntOrIImmed())
+ {
+ emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg,
+ (int)op2->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ assert(op2reg != REG_NA);
+ emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
+ }
+ genProduceReg(treeNode);
+ return;
+ }
+ // dest, op1 and op2 registers are different:
+ // reg3 = reg1 op reg2
+ // We can implement this by issuing a mov:
+ // reg3 = reg1
+ // reg3 = reg3 op reg2
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
+ regTracker.rsTrackRegCopy(targetReg, op1reg);
+ gcInfo.gcMarkRegPtrVal(targetReg, targetType);
+ dst = treeNode;
+ src = op2;
+ }
+
+ // try to use an inc or dec
+ if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
+ {
+ if (src->IsIntegralConst(1))
+ {
+ emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
+ genProduceReg(treeNode);
+ return;
+ }
+ else if (src->IsIntegralConst(-1))
+ {
+ emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
+ genProduceReg(treeNode);
+ return;
+ }
+ }
+ regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+ noway_assert(r == targetReg);
+
+ if (treeNode->gtOverflowEx())
+ {
+#if !defined(_TARGET_64BIT_)
+ assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI);
+#else
+ assert(oper == GT_ADD || oper == GT_SUB);
+#endif
+ genCheckOverflow(treeNode);
+ }
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// isStructReturn: Returns whether the 'treeNode' is returning a struct.
+//
+// Arguments:
+// treeNode - The tree node to evaluate whether is a struct return.
+//
+// Return Value:
+// For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct.
+// Otherwise returns false.
+// For other platforms always returns false.
+//
+bool CodeGen::isStructReturn(GenTreePtr treeNode)
+{
+ // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
+ // For the GT_RET_FILT, the return is always
+ // a bool or a void, for the end of a finally block.
+ noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+ if (treeNode->OperGet() != GT_RETURN)
+ {
+ return false;
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ return varTypeIsStruct(treeNode);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(!varTypeIsStruct(treeNode));
+ return false;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+}
+
+//------------------------------------------------------------------------
+// genStructReturn: Generates code for returning a struct.
+//
+// Arguments:
+// treeNode - The GT_RETURN tree node.
+//
+// Return Value:
+// None
+//
+// Assumption:
+// op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
+void CodeGen::genStructReturn(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_RETURN);
+ GenTreePtr op1 = treeNode->gtGetOp1();
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
+ assert(varDsc->lvIsMultiRegRet);
+
+ ReturnTypeDesc retTypeDesc;
+ retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
+ unsigned regCount = retTypeDesc.GetReturnRegCount();
+ assert(regCount == MAX_RET_REG_COUNT);
+
+ if (varTypeIsEnregisterableStruct(op1))
+ {
+ // Right now the only enregistrable structs supported are SIMD vector types.
+ assert(varTypeIsSIMD(op1));
+ assert(!op1->isContained());
+
+ // This is a case of operand is in a single reg and needs to be
+ // returned in multiple ABI return registers.
+ regNumber opReg = genConsumeReg(op1);
+ regNumber reg0 = retTypeDesc.GetABIReturnReg(0);
+ regNumber reg1 = retTypeDesc.GetABIReturnReg(1);
+
+ if (opReg != reg0 && opReg != reg1)
+ {
+ // Operand reg is different from return regs.
+ // Copy opReg to reg0 and let it to be handled by one of the
+ // two cases below.
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
+ opReg = reg0;
+ }
+
+ if (opReg == reg0)
+ {
+ assert(opReg != reg1);
+
+ // reg0 - already has required 8-byte in bit position [63:0].
+ // reg1 = opReg.
+ // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE);
+ }
+ else
+ {
+ assert(opReg == reg1);
+
+ // reg0 = opReg.
+ // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
+ }
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01);
+ }
+ else
+ {
+ assert(op1->isContained());
+
+ // Copy var on stack into ABI return registers
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc.GetReturnRegType(i);
+ regNumber reg = retTypeDesc.GetABIReturnReg(i);
+ getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
+ offset += genTypeSize(type);
+ }
+ }
+ }
+ else
+ {
+ assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
+
+ genConsumeRegs(op1);
+
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ assert(regCount == MAX_RET_REG_COUNT);
+
+ // Handle circular dependency between call allocated regs and ABI return regs.
+ //
+ // It is possible under LSRA stress that originally allocated regs of call node,
+ // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But
+ // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind
+ // kind of circular dependency could arise between xmm0 and xmm1 return regs.
+ // Codegen is expected to handle such circular dependency.
+ //
+ var_types regType0 = retTypeDesc->GetReturnRegType(0);
+ regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0);
+ regNumber allocatedReg0 = call->GetRegNumByIdx(0);
+
+ var_types regType1 = retTypeDesc->GetReturnRegType(1);
+ regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1);
+ regNumber allocatedReg1 = call->GetRegNumByIdx(1);
+
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
+ if (reloadReg != REG_NA)
+ {
+ allocatedReg0 = reloadReg;
+ }
+
+ reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
+ if (reloadReg != REG_NA)
+ {
+ allocatedReg1 = reloadReg;
+ }
+ }
+
+ if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0)
+ {
+ // Circular dependency - swap allocatedReg0 and allocatedReg1
+ if (varTypeIsFloating(regType0))
+ {
+ assert(varTypeIsFloating(regType1));
+
+ // The fastest way to swap two XMM regs is using PXOR
+ inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
+ inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
+ inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
+ }
+ else
+ {
+ assert(varTypeIsIntegral(regType0));
+ assert(varTypeIsIntegral(regType1));
+ inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
+ }
+ }
+ else if (allocatedReg1 == returnReg0)
+ {
+ // Change the order of moves to correctly handle dependency.
+ if (allocatedReg1 != returnReg1)
+ {
+ inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
+ }
+
+ if (allocatedReg0 != returnReg0)
+ {
+ inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
+ }
+ }
+ else
+ {
+ // No circular dependency case.
+ if (allocatedReg0 != returnReg0)
+ {
+ inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
+ }
+
+ if (allocatedReg1 != returnReg1)
+ {
+ inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
+ }
+ }
+ }
+#else
+ unreached();
+#endif
+}
+
+//------------------------------------------------------------------------
+// genReturn: Generates code for return statement.
+// In case of struct return, delegates to the genStructReturn method.
+//
+// Arguments:
+// treeNode - The GT_RETURN or GT_RETFILT tree node.
+//
+// Return Value:
+// None
+//
+void CodeGen::genReturn(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ var_types targetType = treeNode->TypeGet();
+
+#ifdef DEBUG
+ if (targetType == TYP_VOID)
+ {
+ assert(op1 == nullptr);
+ }
+#endif
+
+#ifdef _TARGET_X86_
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ assert(op1 != nullptr);
+ noway_assert(op1->OperGet() == GT_LONG);
+ GenTree* loRetVal = op1->gtGetOp1();
+ GenTree* hiRetVal = op1->gtGetOp2();
+ noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
+
+ genConsumeReg(loRetVal);
+ genConsumeReg(hiRetVal);
+ if (loRetVal->gtRegNum != REG_LNGRET_LO)
+ {
+ inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
+ }
+ if (hiRetVal->gtRegNum != REG_LNGRET_HI)
+ {
+ inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
+ }
+ }
+ else
+#endif // !defined(_TARGET_X86_)
+ {
+ if (isStructReturn(treeNode))
+ {
+ genStructReturn(treeNode);
+ }
+ else if (targetType != TYP_VOID)
+ {
+ assert(op1 != nullptr);
+ noway_assert(op1->gtRegNum != REG_NA);
+
+ // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
+ // consumed a reg for the operand. This is because the variable
+ // is dead after return. But we are issuing more instructions
+ // like "profiler leave callback" after this consumption. So
+ // if you are issuing more instructions after this point,
+ // remember to keep the variable live up until the new method
+ // exit point where it is actually dead.
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+#ifdef _TARGET_X86_
+ if (varTypeIsFloating(treeNode))
+ {
+ // Spill the return value register from an XMM register to the stack, then load it on the x87 stack.
+ // If it already has a home location, use that. Otherwise, we need a temp.
+ if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame)
+ {
+ // Store local variable to its home location, if necessary.
+ if ((op1->gtFlags & GTF_REG_VAL) != 0)
+ {
+ op1->gtFlags &= ~GTF_REG_VAL;
+ inst_TT_RV(ins_Store(op1->gtType,
+ compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)),
+ op1, op1->gtRegNum);
+ }
+ // Now, load it to the fp stack.
+ getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+ }
+ else
+ {
+ // Spill the value, which should be in a register, then load it to the fp stack.
+ // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
+ op1->gtFlags |= GTF_SPILL;
+ regSet.rsSpillTree(op1->gtRegNum, op1);
+ op1->gtFlags |= GTF_SPILLED;
+ op1->gtFlags &= ~GTF_SPILL;
+
+ TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
+ inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
+ op1->gtFlags &= ~GTF_SPILLED;
+ compiler->tmpRlsTemp(t);
+ }
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ if (op1->gtRegNum != retReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+ }
+ }
+ }
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // !! Note !!
+ // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
+ // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
+ // in the handling of the GT_RETURN statement.
+ // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
+ // for the return registers containing GC refs.
+
+ // There will be a single return block while generating profiler ELT callbacks.
+ //
+ // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
+ // In flowgraph and other places assert that the last node of a block marked as
+ // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
+ // maintain such an invariant irrespective of whether profiler hook needed or not.
+ // Also, there is not much to be gained by materializing it as an explicit node.
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ // !! NOTE !!
+ // Since we are invalidating the assumption that we would slip into the epilog
+ // right after the "return", we need to preserve the return reg's GC state
+ // across the call until actual method return.
+ if (varTypeIsGC(compiler->info.compRetType))
+ {
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
+ }
+
+ genProfilingLeaveCallback();
+
+ if (varTypeIsGC(compiler->info.compRetType))
+ {
+ gcInfo.gcMarkRegSetNpt(REG_INTRET);
+ }
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a single node in the tree.
+ * Preconditions: All operands have been evaluated
+ *
+ */
+void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+ regNumber targetReg;
+#if !defined(_TARGET_64BIT_)
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ // All long enregistered nodes will have been decomposed into their
+ // constituent lo and hi nodes.
+ targetReg = REG_NA;
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+ targetReg = treeNode->gtRegNum;
+ }
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+#ifdef DEBUG
+ // Validate that all the operands for the current node are consumed in order.
+ // This is important because LSRA ensures that any necessary copies will be
+ // handled correctly.
+ lastConsumedNode = nullptr;
+ if (compiler->verbose)
+ {
+ unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+ printf("Generating: ");
+ compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ }
+#endif // DEBUG
+
+ // Is this a node whose value is already in a register? LSRA denotes this by
+ // setting the GTF_REUSE_REG_VAL flag.
+ if (treeNode->IsReuseRegVal())
+ {
+ // For now, this is only used for constant nodes.
+ assert((treeNode->OperIsConst()));
+ JITDUMP(" TreeNode is marked ReuseReg\n");
+ return;
+ }
+
+ // contained nodes are part of their parents for codegen purposes
+ // ex : immediates, most LEAs
+ if (treeNode->isContained())
+ {
+ return;
+ }
+
+ switch (treeNode->gtOper)
+ {
+ case GT_START_NONGC:
+ getEmitter()->emitDisableGC();
+ break;
+
+ case GT_PROF_HOOK:
+#ifdef PROFILING_SUPPORTED
+ // We should be seeing this only if profiler hook is needed
+ noway_assert(compiler->compIsProfilerHookNeeded());
+
+ // Right now this node is used only for tail calls. In future if
+ // we intend to use it for Enter or Leave hooks, add a data member
+ // to this node indicating the kind of profiler hook. For example,
+ // helper number can be used.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+ break;
+
+ case GT_LCLHEAP:
+ genLclHeap(treeNode);
+ break;
+
+ case GT_CNS_INT:
+#ifdef _TARGET_X86_
+ NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants");
+#endif // _TARGET_X86_
+ __fallthrough;
+
+ case GT_CNS_DBL:
+ genSetRegToConst(targetReg, targetType, treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_NEG:
+ case GT_NOT:
+ if (varTypeIsFloating(targetType))
+ {
+ assert(treeNode->gtOper == GT_NEG);
+ genSSE2BitwiseOp(treeNode);
+ }
+ else
+ {
+ GenTreePtr operand = treeNode->gtGetOp1();
+ assert(!operand->isContained());
+ regNumber operandReg = genConsumeReg(operand);
+
+ if (operandReg != targetReg)
+ {
+ inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
+ }
+
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+ inst_RV(ins, targetReg, targetType);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ assert(varTypeIsIntegralOrI(treeNode));
+ __fallthrough;
+
+#if !defined(_TARGET_64BIT_)
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
+#endif // !defined(_TARGET_64BIT_)
+ case GT_ADD:
+ case GT_SUB:
+ genConsumeOperands(treeNode->AsOp());
+ genCodeForBinary(treeNode);
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ genCodeForShift(treeNode);
+ // genCodeForShift() calls genProduceReg()
+ break;
+
+ case GT_CAST:
+#if !defined(_TARGET_64BIT_)
+ // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not
+ // yet support casting FROM a long type either, and that's simpler to catch
+ // here.
+ NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG");
+#endif // !defined(_TARGET_64BIT_)
+
+ if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double <--> double/float
+ genFloatToFloatCast(treeNode);
+ }
+ else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double --> int32/int64
+ genFloatToIntCast(treeNode);
+ }
+ else if (varTypeIsFloating(targetType))
+ {
+ // Casts int32/uint32/int64/uint64 --> float/double
+ genIntToFloatCast(treeNode);
+ }
+ else
+ {
+ // Casts int <--> int
+ genIntToIntCast(treeNode);
+ }
+ // The per-case functions call genProduceReg()
+ break;
+
+ case GT_LCL_VAR:
+ {
+ // lcl_vars are not defs
+ assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
+
+ GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
+ bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+
+ if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
+ {
+ assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+ }
+
+ // If this is a register candidate that has been spilled, genConsumeReg() will
+ // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
+
+ if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
+ {
+ assert(!isRegCandidate);
+
+ emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
+ emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
+ genProduceReg(treeNode);
+ }
+ }
+ break;
+
+ case GT_LCL_FLD_ADDR:
+ case GT_LCL_VAR_ADDR:
+ // Address of a local var. This by itself should never be allocated a register.
+ // If it is worth storing the address in a register then it should be cse'ed into
+ // a temp and that would be allocated a register.
+ noway_assert(targetType == TYP_BYREF);
+ noway_assert(!treeNode->InReg());
+
+ inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_LCL_FLD:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(treeNode->gtRegNum != REG_NA);
+
+#ifdef FEATURE_SIMD
+ // Loading of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genLoadLclFldTypeSIMD12(treeNode);
+ break;
+ }
+#endif
+
+ emitAttr size = emitTypeSize(targetType);
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_STORE_LCL_FLD:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(!treeNode->InReg());
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+#ifdef FEATURE_SIMD
+ // storing of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genStoreLclFldTypeSIMD12(treeNode);
+ break;
+ }
+#endif
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ genConsumeRegs(op1);
+ emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ }
+ break;
+
+ case GT_STORE_LCL_VAR:
+ {
+ GenTreePtr op1 = treeNode->gtGetOp1();
+
+ // var = call, where call returns a multi-reg return value
+ // case is handled separately.
+ if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
+ {
+ genMultiRegCallStoreToLocal(treeNode);
+ }
+ else
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
+
+#if !defined(_TARGET_64BIT_)
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ genStoreLongLclVar(treeNode);
+ break;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
+ {
+ // This is only possible for a zero-init.
+ noway_assert(op1->IsIntegralConst(0));
+ genSIMDZero(targetType, varDsc->lvBaseType, targetReg);
+ genProduceReg(treeNode);
+ break;
+ }
+#endif // FEATURE_SIMD
+
+ genConsumeRegs(op1);
+
+ if (treeNode->gtRegNum == REG_NA)
+ {
+ // stack store
+ emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)),
+ emitTypeSize(targetType), treeNode);
+ varDsc->lvRegNum = REG_STK;
+ }
+ else
+ {
+ bool containedOp1 = op1->isContained();
+ // Look for the case where we have a constant zero which we've marked for reuse,
+ // but which isn't actually in the register we want. In that case, it's better to create
+ // zero in the target register, because an xor is smaller than a copy. Note that we could
+ // potentially handle this in the register allocator, but we can't always catch it there
+ // because the target may not have a register allocated for it yet.
+ if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) &&
+ (op1->IsIntegralConst(0) || op1->IsFPZero()))
+ {
+ op1->gtRegNum = REG_NA;
+ op1->ResetReuseRegVal();
+ containedOp1 = true;
+ }
+
+ if (containedOp1)
+ {
+ // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support a contained memory op.
+ // This is a bit tricky because we have to decide it's contained before register allocation,
+ // and this would be a case where, once that's done, we need to mark that node as always
+ // requiring a register - which we always assume now anyway, but once we "optimize" that
+ // we'll have to take cases like this into account.
+ assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+ genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ }
+ else if (op1->gtRegNum != treeNode->gtRegNum)
+ {
+ assert(op1->gtRegNum != REG_NA);
+ emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ }
+ }
+ }
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ genProduceReg(treeNode);
+ }
+ }
+ break;
+
+ case GT_RETFILT:
+ // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
+ // the return register, if it's not already there. The processing is the same as GT_RETURN.
+ if (targetType != TYP_VOID)
+ {
+ // For filters, the IL spec says the result is type int32. Further, the only specified legal values
+ // are 0 or 1, with the use of other values "undefined".
+ assert(targetType == TYP_INT);
+ }
+
+ __fallthrough;
+
+ case GT_RETURN:
+ genReturn(treeNode);
+ break;
+
+ case GT_LEA:
+ {
+ // if we are here, it is the case where there is an LEA that cannot
+ // be folded into a parent instruction
+ GenTreeAddrMode* lea = treeNode->AsAddrMode();
+ genLeaInstruction(lea);
+ }
+ // genLeaInstruction calls genProduceReg()
+ break;
+
+ case GT_IND:
+#ifdef FEATURE_SIMD
+ // Handling of Vector3 type values loaded through indirection.
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genLoadIndTypeSIMD12(treeNode);
+ break;
+ }
+#endif // FEATURE_SIMD
+
+ genConsumeAddress(treeNode->AsIndir()->Addr());
+ emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MULHI:
+ genCodeForMulHi(treeNode->AsOp());
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MUL:
+ {
+ instruction ins;
+ emitAttr size = emitTypeSize(treeNode);
+ bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
+ bool requiresOverflowCheck = treeNode->gtOverflowEx();
+
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* op2 = treeNode->gtGetOp2();
+
+ // there are 3 forms of x64 multiply:
+ // 1-op form with 128 result: RDX:RAX = RAX * rm
+ // 2-op form: reg *= rm
+ // 3-op form: reg = rm * imm
+
+ genConsumeOperands(treeNode->AsOp());
+
+ // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
+ //
+ // immOp :: Only one operand can be an immediate
+ // rmOp :: Only one operand can be a memory op.
+ // regOp :: A register op (especially the operand that matches 'targetReg')
+ // (can be nullptr when we have both a memory op and an immediate op)
+
+ GenTree* immOp = nullptr;
+ GenTree* rmOp = op1;
+ GenTree* regOp;
+
+ if (op2->isContainedIntOrIImmed())
+ {
+ immOp = op2;
+ }
+ else if (op1->isContainedIntOrIImmed())
+ {
+ immOp = op1;
+ rmOp = op2;
+ }
+
+ if (immOp != nullptr)
+ {
+ // This must be a non-floating point operation.
+ assert(!varTypeIsFloating(treeNode));
+
+ // CQ: When possible use LEA for mul by imm 3, 5 or 9
+ ssize_t imm = immOp->AsIntConCommon()->IconValue();
+
+ if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
+ {
+ // We will use the LEA instruction to perform this multiply
+ // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
+ unsigned int scale = (unsigned int)(imm - 1);
+ getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
+ }
+ else
+ {
+ // use the 3-op form with immediate
+ ins = getEmitter()->inst3opImulForReg(targetReg);
+ emit->emitInsBinary(ins, size, rmOp, immOp);
+ }
+ }
+ else // we have no contained immediate operand
+ {
+ regOp = op1;
+ rmOp = op2;
+
+ regNumber mulTargetReg = targetReg;
+ if (isUnsignedMultiply && requiresOverflowCheck)
+ {
+ ins = INS_mulEAX;
+ mulTargetReg = REG_RAX;
+ }
+ else
+ {
+ ins = genGetInsForOper(GT_MUL, targetType);
+ }
+
+ // Set rmOp to the contain memory operand (if any)
+ // or set regOp to the op2 when it has the matching target register for our multiply op
+ //
+ if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
+ {
+ regOp = op2;
+ rmOp = op1;
+ }
+ assert(!regOp->isContained());
+
+ // Setup targetReg when neither of the source operands was a matching register
+ if (regOp->gtRegNum != mulTargetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
+ }
+
+ emit->emitInsBinary(ins, size, treeNode, rmOp);
+
+ // Move the result to the desired register, if necessary
+ if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
+ }
+ }
+
+ if (requiresOverflowCheck)
+ {
+ // Overflow checking is only used for non-floating point types
+ noway_assert(!varTypeIsFloating(treeNode));
+
+ genCheckOverflow(treeNode);
+ }
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
+ // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
+ // on float/double args.
+ noway_assert(!varTypeIsFloating(treeNode));
+ __fallthrough;
+
+ case GT_DIV:
+ genCodeForDivMod(treeNode->AsOp());
+ break;
+
+ case GT_INTRINSIC:
+ genIntrinsic(treeNode);
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ genSIMDIntrinsic(treeNode->AsSIMD());
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CKFINITE:
+ genCkfinite(treeNode);
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ {
+ // TODO-XArch-CQ: Check if we can use the currently set flags.
+ // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
+ // (signed < or >= where targetReg != REG_NA)
+
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ var_types op1Type = op1->TypeGet();
+
+ if (varTypeIsFloating(op1Type))
+ {
+ genCompareFloat(treeNode);
+ }
+#if !defined(_TARGET_64BIT_)
+ // X86 Long comparison
+ else if (varTypeIsLong(op1Type))
+ {
+ // When not materializing the result in a register, the compare logic is generated
+ // when we generate the GT_JTRUE.
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ genCompareLong(treeNode);
+ }
+ else
+ {
+ // We generate the compare when we generate the GT_JTRUE, but we need to consume
+ // the operands now.
+ genConsumeOperands(treeNode->AsOp());
+ }
+ }
+#endif // !defined(_TARGET_64BIT_)
+ else
+ {
+ genCompareInt(treeNode);
+ }
+ }
+ break;
+
+ case GT_JTRUE:
+ {
+ GenTree* cmp = treeNode->gtOp.gtOp1;
+
+ assert(cmp->OperIsCompare());
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+#if !defined(_TARGET_64BIT_)
+ // For long compares, we emit special logic
+ if (varTypeIsLong(cmp->gtGetOp1()))
+ {
+ genJTrueLong(cmp);
+ }
+ else
+#endif
+ {
+ // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
+ // is governed by a flag NOT by the inherent type of the node
+ // TODO-XArch-CQ: Check if we can use the currently set flags.
+ emitJumpKind jumpKind[2];
+ bool branchToTrueLabel[2];
+ genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
+
+ BasicBlock* skipLabel = nullptr;
+ if (jumpKind[0] != EJ_NONE)
+ {
+ BasicBlock* jmpTarget;
+ if (branchToTrueLabel[0])
+ {
+ jmpTarget = compiler->compCurBB->bbJumpDest;
+ }
+ else
+ {
+ // This case arises only for ordered GT_EQ right now
+ assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
+ skipLabel = genCreateTempLabel();
+ jmpTarget = skipLabel;
+ }
+
+ inst_JMP(jumpKind[0], jmpTarget);
+ }
+
+ if (jumpKind[1] != EJ_NONE)
+ {
+ // the second conditional branch always has to be to the true label
+ assert(branchToTrueLabel[1]);
+ inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+ }
+
+ if (skipLabel != nullptr)
+ {
+ genDefineTempLabel(skipLabel);
+ }
+ }
+ }
+ break;
+
+ case GT_RETURNTRAP:
+ {
+ // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+ // based on the contents of 'data'
+
+ GenTree* data = treeNode->gtOp.gtOp1;
+ genConsumeRegs(data);
+ GenTreeIntCon cns = intForm(TYP_INT, 0);
+ emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
+
+ BasicBlock* skipLabel = genCreateTempLabel();
+
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, skipLabel);
+
+ // emit the call to the EE-helper that stops for GC (or other reasons)
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+ assert(genIsValidIntReg(tmpReg));
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
+ genDefineTempLabel(skipLabel);
+ }
+ break;
+
+ case GT_STOREIND:
+ genStoreInd(treeNode);
+ break;
+
+ case GT_COPY:
+ // This is handled at the time we call genConsumeReg() on the GT_COPY
+ break;
+
+ case GT_SWAP:
+ {
+ // Swap is only supported for lclVar operands that are enregistered
+ // We do not consume or produce any registers. Both operands remain enregistered.
+ // However, the gc-ness may change.
+ assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
+
+ GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
+ var_types type1 = varDsc1->TypeGet();
+ GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
+ LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
+ var_types type2 = varDsc2->TypeGet();
+
+ // We must have both int or both fp regs
+ assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
+
+ // FP swap is not yet implemented (and should have NYI'd in LSRA)
+ assert(!varTypeIsFloating(type1));
+
+ regNumber oldOp1Reg = lcl1->gtRegNum;
+ regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
+ regNumber oldOp2Reg = lcl2->gtRegNum;
+ regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+
+ // We don't call genUpdateVarReg because we don't have a tree node with the new register.
+ varDsc1->lvRegNum = oldOp2Reg;
+ varDsc2->lvRegNum = oldOp1Reg;
+
+ // Do the xchg
+ emitAttr size = EA_PTRSIZE;
+ if (varTypeGCtype(type1) != varTypeGCtype(type2))
+ {
+ // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
+ // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
+ size = EA_GCREF;
+ }
+ inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
+
+ // Update the gcInfo.
+ // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
+ gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+ gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+
+ // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
+ // It will also dump the updates.
+ gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
+ gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
+ }
+ break;
+
+ case GT_LIST:
+ case GT_ARGPLACE:
+ // Nothing to do
+ break;
+
+ case GT_PUTARG_STK:
+ genPutArgStk(treeNode);
+ break;
+
+ case GT_PUTARG_REG:
+ {
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ noway_assert(targetType != TYP_STRUCT);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // commas show up here commonly, as part of a nullchk operation
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+ // If child node is not already in the register we need, move it
+ genConsumeReg(op1);
+ if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
+ }
+ genProduceReg(treeNode);
+ }
+ break;
+
+ case GT_CALL:
+ genCallInstruction(treeNode);
+ break;
+
+ case GT_JMP:
+ genJmpMethod(treeNode);
+ break;
+
+ case GT_LOCKADD:
+ case GT_XCHG:
+ case GT_XADD:
+ genLockedInstructions(treeNode);
+ break;
+
+ case GT_MEMORYBARRIER:
+ instGen_MemoryBarrier();
+ break;
+
+ case GT_CMPXCHG:
+ {
+ GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1
+ GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2
+ GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3
+
+ assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
+ assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
+
+ genConsumeReg(location);
+ genConsumeReg(value);
+ genConsumeReg(comparand);
+ // comparand goes to RAX;
+ // Note that we must issue this move after the genConsumeRegs(), in case any of the above
+ // have a GT_COPY from RAX.
+ if (comparand->gtRegNum != REG_RAX)
+ {
+ inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
+ }
+
+ // location is Rm
+ instGen(INS_lock);
+
+ emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
+
+ // Result is in RAX
+ if (targetReg != REG_RAX)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
+ }
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_RELOAD:
+ // do nothing - reload is just a marker.
+ // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+ // into the register specified in this node.
+ break;
+
+ case GT_NOP:
+ break;
+
+ case GT_NO_OP:
+ if (treeNode->gtFlags & GTF_NO_OP_NO)
+ {
+ noway_assert(!"GTF_NO_OP_NO should not be set");
+ }
+ else
+ {
+ getEmitter()->emitIns_Nop(1);
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ genRangeCheck(treeNode);
+ break;
+
+ case GT_PHYSREG:
+ if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
+ {
+ inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
+
+ genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_PHYSREGDST:
+ break;
+
+ case GT_NULLCHECK:
+ {
+ assert(!treeNode->gtOp.gtOp1->isContained());
+ regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
+ emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
+ }
+ break;
+
+ case GT_CATCH_ARG:
+
+ noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+ /* Catch arguments get passed in a register. genCodeForBBlist()
+ would have marked it as holding a GC object, but not used. */
+
+ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+ genConsumeReg(treeNode);
+ break;
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+
+ // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
+ // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var
+
+ unsigned finallyNesting;
+ finallyNesting = treeNode->gtVal.gtVal1;
+ noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount);
+ noway_assert(finallyNesting < compiler->compHndBBtabCount);
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
+ TARGET_POINTER_SIZE); // below doesn't underflow.
+ filterEndOffsetSlotOffs =
+ (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
+
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
+ break;
+#endif // !FEATURE_EH_FUNCLETS
+
+ case GT_PINVOKE_PROLOG:
+ noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+ // the runtime side requires the codegen here to be consistent
+ emit->emitDisableRandomNops();
+ break;
+
+ case GT_LABEL:
+ genPendingCallLabel = genCreateTempLabel();
+ treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+ emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
+ break;
+
+ case GT_STORE_OBJ:
+ if (treeNode->OperIsCopyBlkOp() && !treeNode->AsBlk()->gtBlkOpGcUnsafe)
+ {
+ assert(treeNode->AsObj()->gtGcPtrCount != 0);
+ genCodeForCpObj(treeNode->AsObj());
+ break;
+ }
+ __fallthrough;
+
+ case GT_STORE_DYN_BLK:
+ case GT_STORE_BLK:
+ genCodeForStoreBlk(treeNode->AsBlk());
+ break;
+
+ case GT_JMPTABLE:
+ genJumpTable(treeNode);
+ break;
+
+ case GT_SWITCH_TABLE:
+ genTableBasedSwitch(treeNode);
+ break;
+
+ case GT_ARR_INDEX:
+ genCodeForArrIndex(treeNode->AsArrIndex());
+ break;
+
+ case GT_ARR_OFFSET:
+ genCodeForArrOffset(treeNode->AsArrOffs());
+ break;
+
+ case GT_CLS_VAR_ADDR:
+ getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
+ genProduceReg(treeNode);
+ break;
+
+#if !defined(_TARGET_64BIT_)
+ case GT_LONG:
+ assert(!treeNode->isContained());
+ genConsumeRegs(treeNode);
+ break;
+#endif
+
+ case GT_IL_OFFSET:
+ // Do nothing; these nodes are simply markers for debug info.
+ break;
+
+ default:
+ {
+#ifdef DEBUG
+ char message[256];
+ sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+#endif
+ assert(!"Unknown node in codegen");
+ }
+ break;
+ }
+}
+
+//----------------------------------------------------------------------------------
+// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
+//
+// Arguments:
+// treeNode - Gentree of GT_STORE_LCL_VAR
+//
+// Return Value:
+// None
+//
+// Assumption:
+// The child of store is a multi-reg call node.
+// genProduceReg() on treeNode is made by caller of this routine.
+//
+void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
+ assert(varTypeIsStruct(treeNode));
+
+ // Assumption: current x64 Unix implementation requires that a multi-reg struct
+ // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+ // being struct promoted.
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ noway_assert(varDsc->lvIsMultiRegRet);
+
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+ assert(call->HasMultiRegRetVal());
+
+ genConsumeRegs(op1);
+
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ // Right now the only enregistrable structs supported are SIMD types.
+ assert(varTypeIsSIMD(treeNode));
+ assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
+ assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));
+
+ // This is a case of two 8-bytes that comprise the operand is in
+ // two different xmm registers and needs to assembled into a single
+ // xmm register.
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber reg0 = call->GetRegNumByIdx(0);
+ regNumber reg1 = call->GetRegNumByIdx(1);
+
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
+ if (reloadReg != REG_NA)
+ {
+ reg0 = reloadReg;
+ }
+
+ reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
+ if (reloadReg != REG_NA)
+ {
+ reg1 = reloadReg;
+ }
+ }
+
+ if (targetReg != reg0 && targetReg != reg1)
+ {
+ // Copy reg0 into targetReg and let it to be handled by one
+ // of the cases below.
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE);
+ targetReg = reg0;
+ }
+
+ if (targetReg == reg0)
+ {
+ // targeReg[63:0] = targetReg[63:0]
+ // targetReg[127:64] = reg1[127:64]
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
+ }
+ else
+ {
+ assert(targetReg == reg1);
+
+ // We need two shuffles to achieve this
+ // First:
+ // targeReg[63:0] = targetReg[63:0]
+ // targetReg[127:64] = reg0[63:0]
+ //
+ // Second:
+ // targeReg[63:0] = targetReg[127:64]
+ // targetReg[127:64] = targetReg[63:0]
+ //
+ // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
+ // and next swap low and high 8-bytes of targetReg to have them
+ // rearranged in the right order.
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
+ }
+ }
+ else
+ {
+ // Stack store
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber reg = call->GetRegNumByIdx(i);
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+ if (reloadReg != REG_NA)
+ {
+ reg = reloadReg;
+ }
+ }
+
+ assert(reg != REG_NA);
+ getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+ offset += genTypeSize(type);
+ }
+
+ varDsc->lvRegNum = REG_STK;
+ }
+#elif defined(_TARGET_X86_)
+ // Longs are returned in two return registers on x86.
+ assert(varTypeIsLong(treeNode));
+
+ // Assumption: current x86 implementation requires that a multi-reg long
+ // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+ // being promoted.
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ noway_assert(varDsc->lvIsMultiRegRet);
+
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+ assert(call->HasMultiRegRetVal());
+
+ genConsumeRegs(op1);
+
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ assert(regCount == MAX_RET_REG_COUNT);
+
+ // Stack store
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber reg = call->GetRegNumByIdx(i);
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+ if (reloadReg != REG_NA)
+ {
+ reg = reloadReg;
+ }
+ }
+
+ assert(reg != REG_NA);
+ getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+ offset += genTypeSize(type);
+ }
+
+ varDsc->lvRegNum = REG_STK;
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
+ assert(!"Unreached");
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
+}
+
+//------------------------------------------------------------------------
+// genLclHeap: Generate code for localloc.
+//
+// Arguments:
+// tree - the localloc tree to generate.
+//
+// Notes:
+// Note that for x86, we don't track ESP movements while generating the localloc code.
+// The ESP tracking is used to report stack pointer-relative GC info, which is not
+// interesting while doing the localloc construction. Also, for functions with localloc,
+// we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function
+// call arguments. We store the ESP after the localloc is complete in the LocAllocSP
+// variable. This variable is implicitly reported to the VM in the GC info (its position
+// is defined by convention relative to other items), and is used by the GC to find the
+// "base" stack pointer in functions with localloc.
+//
+void CodeGen::genLclHeap(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_LCLHEAP);
+ assert(compiler->compLocallocUsed);
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+ regNumber targetReg = tree->gtRegNum;
+ regMaskTP tmpRegsMask = tree->gtRsvdRegs;
+ regNumber regCnt = REG_NA;
+ var_types type = genActualType(size->gtType);
+ emitAttr easz = emitTypeSize(type);
+ BasicBlock* endLabel = nullptr;
+
+#ifdef DEBUG
+ // Verify ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+
+ noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+ noway_assert(genStackLevel == 0); // Can't have anything on the stack
+
+ unsigned stackAdjustment = 0;
+ BasicBlock* loop = nullptr;
+
+ // compute the amount of memory to allocate to properly STACK_ALIGN.
+ size_t amount = 0;
+ if (size->IsCnsIntOrI())
+ {
+ // If size is a constant, then it must be contained.
+ assert(size->isContained());
+
+ // If amount is zero then return null in targetReg
+ amount = size->gtIntCon.gtIconVal;
+ if (amount == 0)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
+ goto BAILOUT;
+ }
+
+ // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
+ amount = AlignUp(amount, STACK_ALIGN);
+ }
+ else
+ {
+ // The localloc requested memory size is non-constant.
+
+ // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
+ genConsumeRegAndCopy(size, targetReg);
+ endLabel = genCreateTempLabel();
+ getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
+ inst_JMP(EJ_je, endLabel);
+
+ // Compute the size of the block to allocate and perform alignment.
+ // If compInitMem=true, we can reuse targetReg as regcnt,
+ // since we don't need any internal registers.
+ if (compiler->info.compInitMem)
+ {
+ assert(genCountBits(tmpRegsMask) == 0);
+ regCnt = targetReg;
+ }
+ else
+ {
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regCnt = genRegNumFromMask(regCntMask);
+ if (regCnt != targetReg)
+ {
+ // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
+ inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
+ }
+ }
+
+ // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done
+ // by code like:
+ // add reg, 15
+ // and reg, -16
+ // However, in the initialized memory case, we need the count of STACK_ALIGN-sized
+ // elements, not a byte count, after the alignment. So instead of the "and", which
+ // becomes unnecessary, generate a shift, e.g.:
+ // add reg, 15
+ // shr reg, 4
+
+ inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));
+
+ if (compiler->info.compInitMem)
+ {
+ // Convert the count from a count of bytes to a loop count. We will loop once per
+ // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
+ // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
+ // words per iteration on x64. We will shift off all the stack alignment bits
+ // added above, so there is no need for an 'and' instruction.
+
+ // --- shr regCnt, 2 (or 4) ---
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
+ }
+ else
+ {
+ // Otherwise, mask off the low bits to align the byte count.
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+ }
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP by popping off the
+ // outgoing arg area. We will restore it right before we return from this method.
+ //
+ // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
+ // are the cases that need to be handled:
+ // i) Method has out-going arg area.
+ // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
+ // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space.
+ // ii) Method has no out-going arg area.
+ // Nothing to pop off from the stack.
+ if (compiler->lvaOutgoingArgSpaceSize > 0)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+ // aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
+ }
+#endif
+
+ if (size->IsCnsIntOrI())
+ {
+ // We should reach here only for non-zero, constant size allocations.
+ assert(amount > 0);
+ assert((amount % STACK_ALIGN) == 0);
+ assert((amount % REGSIZE_BYTES) == 0);
+
+ // For small allocations we will generate up to six push 0 inline
+ size_t cntRegSizedWords = amount / REGSIZE_BYTES;
+ if (cntRegSizedWords <= 6)
+ {
+ for (; cntRegSizedWords != 0; cntRegSizedWords--)
+ {
+ inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
+ }
+ goto ALLOC_DONE;
+ }
+
+ bool doNoInitLessThanOnePageAlloc =
+ !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <=
+
+#ifdef _TARGET_X86_
+ bool needRegCntRegister = true;
+#else // !_TARGET_X86_
+ bool needRegCntRegister = !doNoInitLessThanOnePageAlloc;
+#endif // !_TARGET_X86_
+
+ if (needRegCntRegister)
+ {
+ // If compInitMem=true, we can reuse targetReg as regcnt.
+ // Since size is a constant, regCnt is not yet initialized.
+ assert(regCnt == REG_NA);
+ if (compiler->info.compInitMem)
+ {
+ assert(genCountBits(tmpRegsMask) == 0);
+ regCnt = targetReg;
+ }
+ else
+ {
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regCnt = genRegNumFromMask(regCntMask);
+ }
+ }
+
+ if (doNoInitLessThanOnePageAlloc)
+ {
+ // Since the size is less than a page, simply adjust ESP.
+ // ESP might already be in the guard page, so we must touch it BEFORE
+ // the alloc, not after.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
+ // to ESP. So do the work in the count register.
+ // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
+ // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
+ // track".
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+ inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
+#else // !_TARGET_X86_
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+ inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
+#endif // !_TARGET_X86_
+
+ goto ALLOC_DONE;
+ }
+
+ // else, "mov regCnt, amount"
+
+ if (compiler->info.compInitMem)
+ {
+ // When initializing memory, we want 'amount' to be the loop count.
+ assert((amount % STACK_ALIGN) == 0);
+ amount /= STACK_ALIGN;
+ }
+
+ genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
+ }
+
+ loop = genCreateTempLabel();
+ if (compiler->info.compInitMem)
+ {
+ // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
+ // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
+ // Since we have to zero out the allocated memory AND ensure that RSP is always valid
+ // by tickling the pages, we will just push 0's on the stack.
+
+ assert(genIsValidIntReg(regCnt));
+
+ // Loop:
+ genDefineTempLabel(loop);
+
+#if defined(_TARGET_AMD64_)
+ // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
+ static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
+ inst_IV(INS_push_hide, 0); // --- push 8-byte 0
+ inst_IV(INS_push_hide, 0); // --- push 8-byte 0
+#elif defined(_TARGET_X86_)
+ // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
+ static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
+ inst_IV(INS_push_hide, 0); // --- push 4-byte 0
+#endif // _TARGET_X86_
+
+ // Decrement the loop counter and loop if not done.
+ inst_RV(INS_dec, regCnt, TYP_I_IMPL);
+ inst_JMP(EJ_jne, loop);
+ }
+ else
+ {
+ // At this point 'regCnt' is set to the total number of bytes to localloc.
+ //
+ // We don't need to zero out the allocated memory. However, we do have
+ // to tickle the pages to ensure that ESP is always valid and is
+ // in sync with the "stack guard page". Note that in the worst
+ // case ESP is on the last byte of the guard page. Thus you must
+ // touch ESP+0 first not ESP+x01000.
+ //
+ // Another subtlety is that you don't want ESP to be exactly on the
+ // boundary of the guard page because PUSH is predecrement, thus
+ // call setup would not touch the guard page but just beyond it
+ //
+ // Note that we go through a few hoops so that ESP never points to
+ // illegal pages at any time during the tickling process
+ //
+ // neg REGCNT
+ // add REGCNT, ESP // reg now holds ultimate ESP
+ // jb loop // result is smaller than orignial ESP (no wrap around)
+ // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
+ // loop:
+ // test ESP, [ESP+0] // tickle the page
+ // mov REGTMP, ESP
+ // sub REGTMP, PAGE_SIZE
+ // mov ESP, REGTMP
+ // cmp ESP, REGCNT
+ // jae loop
+ //
+ // mov ESP, REG
+ // end:
+ inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
+ inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
+ inst_JMP(EJ_jb, loop);
+
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+ genDefineTempLabel(loop);
+
+ // Tickle the decremented value, and move back to ESP,
+ // note that it has to be done BEFORE the update of ESP since
+ // ESP might already be on the guard page. It is OK to leave
+ // the final value of ESP on the guard page
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+
+ // This is a harmless trick to avoid the emitter trying to track the
+ // decrement of the ESP - we do the subtraction in another reg instead
+ // of adjusting ESP directly.
+ assert(tmpRegsMask != RBM_NONE);
+ assert(genCountBits(tmpRegsMask) == 1);
+ regNumber regTmp = genRegNumFromMask(tmpRegsMask);
+
+ inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
+ inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
+
+ inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
+ inst_JMP(EJ_jae, loop);
+
+ // Move the final value to ESP
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
+ }
+
+ALLOC_DONE:
+ // Re-adjust SP to allocate out-going arg area
+ if (stackAdjustment > 0)
+ {
+ assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
+ }
+
+ // Return the stackalloc'ed address in result register.
+ // TargetReg = RSP + stackAdjustment.
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
+
+ if (endLabel != nullptr)
+ {
+ genDefineTempLabel(endLabel);
+ }
+
+BAILOUT:
+
+ // Write the lvaLocAllocSPvar stack frame slot
+ noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+ if (compiler->opts.compNeedStackProbes)
+ {
+ genGenerateStackProbe();
+ }
+#endif
+
+#ifdef DEBUG
+ // Update new ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+ }
+#endif
+
+ genProduceReg(tree);
+}
+
+void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
+{
+ if (storeBlkNode->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitDisableGC();
+ }
+ bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
+
+ switch (storeBlkNode->gtBlkOpKind)
+ {
+#ifdef _TARGET_AMD64_
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlk(storeBlkNode);
+ }
+ else
+ {
+ genCodeForInitBlk(storeBlkNode);
+ }
+ break;
+#endif // _TARGET_AMD64_
+ case GenTreeBlk::BlkOpKindRepInstr:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlkRepMovs(storeBlkNode);
+ }
+ else
+ {
+ genCodeForInitBlkRepStos(storeBlkNode);
+ }
+ break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlkUnroll(storeBlkNode);
+ }
+ else
+ {
+ genCodeForInitBlkUnroll(storeBlkNode);
+ }
+ break;
+ default:
+ unreached();
+ }
+ if (storeBlkNode->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitEnableGC();
+ }
+}
+
+// Generate code for InitBlk using rep stos.
+// Preconditions:
+// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
+// Any value larger than that, we'll use the helper even if both the
+// fill byte and the size are integer constants.
+void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
+{
+ // Make sure we got the arguments of the initblk/initobj operation in the right registers
+ unsigned size = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+#ifdef _TARGET_AMD64_
+ assert(size != 0);
+#endif
+ if (initVal->IsCnsIntOrI())
+ {
+#ifdef _TARGET_AMD64_
+ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#else
+ assert(size > CPBLK_UNROLL_LIMIT);
+#endif
+ }
+
+#endif // DEBUG
+
+ genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
+ instGen(INS_r_stosb);
+}
+
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+// a) Both the size and fill byte value are integer constants.
+// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+//
+void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
+{
+ // Make sure we got the arguments of the initblk/initobj operation in the right registers
+ unsigned size = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+ assert(size != 0);
+ assert(size <= INITBLK_UNROLL_LIMIT);
+ assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
+
+ emitter* emit = getEmitter();
+
+ genConsumeOperands(initBlkNode);
+
+ // If the initVal was moved, or spilled and reloaded to a different register,
+ // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
+ // which needs to be the new register.
+ regNumber valReg = initVal->gtRegNum;
+ initVal = initVal->gtSkipReloadOrCopy();
+
+ unsigned offset = 0;
+
+ // Perform an unroll using SSE2 loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs);
+
+#ifdef DEBUG
+ assert(initBlkNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(initBlkNode->gtRsvdRegs) == 1);
+ assert(genIsValidFloatReg(tmpReg));
+#endif // DEBUG
+
+ if (initVal->gtIntCon.gtIconVal != 0)
+ {
+ emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg);
+ emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
+#ifdef _TARGET_X86_
+ // For x86, we need one more to convert it from 8 bytes to 16 bytes.
+ emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
+#endif // _TARGET_X86_
+ }
+ else
+ {
+ emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg);
+ }
+
+ // Determine how many 16 byte slots we're going to fill using SSE movs.
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ while (slots-- > 0)
+ {
+ emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (or a < 16 byte sized struct)
+ if ((size & 8) != 0)
+ {
+#ifdef _TARGET_X86_
+ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+ emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 4;
+ emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 4;
+#else // !_TARGET_X86_
+ emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 8;
+#endif // !_TARGET_X86_
+ }
+ if ((size & 4) != 0)
+ {
+ emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
+ }
+}
+
+// Generates code for InitBlk by calling the VM memset helper function.
+// Preconditions:
+// a) The size argument of the InitBlk is not an integer constant.
+// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
+void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
+{
+#ifdef _TARGET_AMD64_
+ // Make sure we got the arguments of the initblk operation in the right registers
+ unsigned blockSize = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+
+ if (blockSize != 0)
+ {
+ assert(blockSize >= CPBLK_MOVS_LIMIT);
+ }
+
+ genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+ genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+#else // !_TARGET_AMD64_
+ NYI_X86("Helper call for InitBlk");
+#endif // !_TARGET_AMD64_
+}
+
+// Generate code for a load from some address + offset
+// baseNode: tree node which can be either a local address or arbitrary node
+// offset: distance from the baseNode from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
+{
+ emitter* emit = getEmitter();
+
+ if (baseNode->OperIsLocalAddr())
+ {
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ {
+ offset += baseNode->gtLclFld.gtLclOffs;
+ }
+ emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
+ }
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreOffset: Generate code to store a reg to [base + offset].
+//
+// Arguments:
+// ins - the instruction to generate.
+// size - the size that needs to be stored.
+// src - the register which needs to be stored.
+// baseNode - the base, relative to which to store the src register.
+// offset - the offset that is added to the baseNode to calculate the address to store into.
+//
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
+{
+ emitter* emit = getEmitter();
+
+ if (baseNode->OperIsLocalAddr())
+ {
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ {
+ offset += baseNode->gtLclFld.gtLclOffs;
+ }
+
+ emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
+ }
+ else
+ {
+ emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
+ }
+}
+
+// Generates CpBlk code by performing a loop unroll
+// Preconditions:
+// The size argument of the CpBlk node is a constant and <= 64 bytes.
+// This may seem small but covers >95% of the cases in several framework assemblies.
+//
+void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned size = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ GenTreePtr source = cpBlkNode->Data();
+ GenTreePtr srcAddr = nullptr;
+ assert(size <= CPBLK_UNROLL_LIMIT);
+
+ emitter* emit = getEmitter();
+
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = source->gtGetOp1();
+ if (!srcAddr->isContained())
+ {
+ genConsumeReg(srcAddr);
+ }
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
+ // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
+ if (source->OperGet() == GT_LCL_VAR)
+ {
+ source->SetOper(GT_LCL_VAR_ADDR);
+ }
+ else
+ {
+ assert(source->OperGet() == GT_LCL_FLD);
+ source->SetOper(GT_LCL_FLD_ADDR);
+ }
+ srcAddr = source;
+ }
+
+ if (!dstAddr->isContained())
+ {
+ genConsumeReg(dstAddr);
+ }
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
+ regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ // TODO: In the below code the load and store instructions are for 16 bytes, but the
+ // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+ // this probably needs to be changed.
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
+ // Store
+ genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
+
+ if ((size & 8) != 0)
+ {
+#ifdef _TARGET_X86_
+ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+ for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ }
+#else // !_TARGET_X86_
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+ offset += 8;
+#endif // !_TARGET_X86_
+ }
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ }
+ }
+}
+
+// Generate code for CpBlk by using rep movs
+// Preconditions:
+// The size argument of the CpBlk is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned size = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ GenTreePtr source = cpBlkNode->Data();
+ GenTreePtr srcAddr = nullptr;
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+ assert(source->isContained());
+
+#ifdef _TARGET_X86_
+ if (size == 0)
+ {
+ noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK);
+ }
+ else
+#endif
+ {
+#ifdef _TARGET_X64_
+ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#else
+ assert(size > CPBLK_UNROLL_LIMIT);
+#endif
+ }
+#endif // DEBUG
+
+ genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
+ instGen(INS_r_movsb);
+}
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//---------------------------------------------------------------------------------------------------------------//
+// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
+//
+// Arguments:
+// putArgNode - the PutArgStk tree.
+// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
+//
+// TODO-Amd64-Unix: Try to share code with copyblk.
+// Need refactoring of copyblk before it could be used for putarg_stk.
+// The difference for now is that a putarg_stk contains its children, while cpyblk does not.
+// This creates differences in code. After some significant refactoring it could be reused.
+//
+void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+{
+ // We will never call this method for SIMD types, which are stored directly
+ // in genPutStructArgStk().
+ noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
+
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr src = putArgNode->gtOp.gtOp1;
+
+ size_t size = putArgNode->getArgSize();
+ assert(size <= CPBLK_UNROLL_LIMIT);
+
+ emitter* emit = getEmitter();
+ unsigned putArgOffset = putArgNode->getArgOffset();
+
+ assert(src->isContained());
+
+ assert(src->gtOper == GT_OBJ);
+
+ if (!src->gtOp.gtOp1->isContained())
+ {
+ genConsumeReg(src->gtOp.gtOp1);
+ }
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(putArgNode->gtRsvdRegs != RBM_NONE);
+ regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ assert(putArgNode->gtGetOp1()->isContained());
+ assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ);
+
+ // TODO: In the below code the load and store instructions are for 16 bytes, but the
+ // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+ // this probably needs to be changed.
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(),
+ offset); // Load the address of the child of the Obj node.
+
+ // Store
+ emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset);
+
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+ assert(genIsValidIntReg(tmpReg));
+
+ if ((size & 8) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+ emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+ offset += 8;
+ }
+
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+ emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+ offset += 4;
+ }
+
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+ emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+ offset += 2;
+ }
+
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
+ emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
+//
+// Arguments:
+// putArgNode - the PutArgStk tree.
+// baseVarNum - the base var number, relative to which the by-val struct bits will go.
+//
+// Preconditions:
+// The size argument of the PutArgStk (for structs) is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+//
+void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+{
+ assert(putArgNode->TypeGet() == TYP_STRUCT);
+ assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
+ assert(baseVarNum != BAD_VAR_NUM);
+
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr srcAddr = putArgNode->gtGetOp1();
+
+ // Validate state.
+ assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
+ assert(srcAddr->isContained());
+
+ genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
+ instGen(INS_r_movsb);
+}
+
+//------------------------------------------------------------------------
+// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
+// must be cleared to zeroes. The native compiler doesn't clear the upper bits
+// and there is no way to know if the caller is native or not. So, the upper
+// 32 bits of Vector argument on stack are always cleared to zero.
+#ifdef FEATURE_SIMD
+void CodeGen::genClearStackVec3ArgUpperBits()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genClearStackVec3ArgUpperBits()\n");
+#endif
+
+ assert(compiler->compGeneratingProlog);
+
+ unsigned varNum = 0;
+
+ for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
+ {
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ assert(varDsc->lvIsParam);
+
+ // Does var has simd12 type?
+ if (varDsc->lvType != TYP_SIMD12)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsRegArg)
+ {
+ // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
+ getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0);
+ }
+ else
+ {
+ // Assume that for x64 linux, an argument is fully in registers
+ // or fully on stack.
+ regNumber argReg = varDsc->GetOtherArgReg();
+
+ // Clear the upper 32 bits by two shift instructions.
+ // argReg = argReg << 96
+ getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+ // argReg = argReg >> 96
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+ }
+ }
+}
+#endif // FEATURE_SIMD
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// This will generate a sequence of movsq instructions for the cases of non-gc members
+// and calls to the BY_REF_ASSIGN helper otherwise.
+void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
+{
+ // Make sure we got the arguments of the cpobj operation in the right registers
+ GenTreePtr dstAddr = cpObjNode->Addr();
+ GenTreePtr source = cpObjNode->Data();
+ GenTreePtr srcAddr = nullptr;
+ bool sourceIsLocal = false;
+
+ assert(source->isContained());
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = source->gtGetOp1();
+ assert(!srcAddr->isContained());
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ sourceIsLocal = true;
+ // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
+ // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
+ if (source->OperGet() == GT_LCL_VAR)
+ {
+ source->SetOper(GT_LCL_VAR_ADDR);
+ }
+ else
+ {
+ assert(source->OperGet() == GT_LCL_FLD);
+ source->SetOper(GT_LCL_FLD_ADDR);
+ }
+ srcAddr = source;
+ }
+
+ bool dstOnStack = dstAddr->OperIsLocalAddr();
+
+#ifdef DEBUG
+ bool isRepMovsqUsed = false;
+
+ assert(!dstAddr->isContained());
+
+ // If the GenTree node has data about GC pointers, this means we're dealing
+ // with CpObj, so this requires special logic.
+ assert(cpObjNode->gtGcPtrCount > 0);
+
+ // MovSq instruction is used for copying non-gcref fields and it needs
+ // src = RSI and dst = RDI.
+ // Either these registers must not contain lclVars, or they must be dying or marked for spill.
+ // This is because these registers are incremented as we go through the struct.
+ GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
+ GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
+ unsigned srcLclVarNum = BAD_VAR_NUM;
+ unsigned dstLclVarNum = BAD_VAR_NUM;
+ bool isSrcAddrLiveOut = false;
+ bool isDstAddrLiveOut = false;
+ if (genIsRegCandidateLocal(actualSrcAddr))
+ {
+ srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
+ isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+ }
+ if (genIsRegCandidateLocal(actualDstAddr))
+ {
+ dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
+ isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+ }
+ assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
+ ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
+ assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
+ ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
+#endif // DEBUG
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ if (sourceIsLocal)
+ {
+ inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF);
+ genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA);
+ }
+ else
+ {
+ genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
+ }
+ gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
+ gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
+
+ unsigned slots = cpObjNode->gtSlots;
+
+ // If we can prove it's on the stack we don't need to use the write barrier.
+ if (dstOnStack)
+ {
+ if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+#ifdef DEBUG
+ // If the destination of the CpObj is on the stack
+ // make sure we allocated RCX to emit rep movsq.
+ regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
+ assert(tmpReg == REG_RCX);
+ isRepMovsqUsed = true;
+#endif // DEBUG
+
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
+ instGen(INS_r_movsq);
+ }
+ else
+ {
+ // For small structs, it's better to emit a sequence of movsq than to
+ // emit a rep movsq instruction.
+ while (slots > 0)
+ {
+ instGen(INS_movsq);
+ slots--;
+ }
+ }
+ }
+ else
+ {
+ BYTE* gcPtrs = cpObjNode->gtGcPtrs;
+ unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
+
+ unsigned i = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // to save cycles and code size.
+ {
+ unsigned nonGcSlotCount = 0;
+
+ do
+ {
+ nonGcSlotCount++;
+ i++;
+ } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+ // If we have a very small contiguous non-gc region, it's better just to
+ // emit a sequence of movsq instructions
+ if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ while (nonGcSlotCount > 0)
+ {
+ instGen(INS_movsq);
+ nonGcSlotCount--;
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ // Otherwise, we can save code-size and improve CQ by emitting
+ // rep movsq
+ regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
+ assert(tmpReg == REG_RCX);
+ isRepMovsqUsed = true;
+#endif // DEBUG
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+ instGen(INS_r_movsq);
+ }
+ }
+ break;
+ default:
+ // We have a GC pointer, call the memory barrier.
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+ gcPtrCount--;
+ i++;
+ }
+ }
+
+ assert(gcPtrCount == 0);
+ }
+
+ // Clear the gcInfo for RSI and RDI.
+ // While we normally update GC info prior to the last instruction that uses them,
+ // these actually live into the helper call.
+ gcInfo.gcMarkRegSetNpt(RBM_RSI);
+ gcInfo.gcMarkRegSetNpt(RBM_RDI);
+}
+
+// Generate code for a CpBlk node by the means of the VM memcpy helper call
+// Preconditions:
+// a) The size argument of the CpBlk is not an integer constant
+// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
+{
+#ifdef _TARGET_AMD64_
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned blockSize = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ GenTreePtr source = cpBlkNode->Data();
+ GenTreePtr srcAddr = nullptr;
+
+ // Size goes in arg2
+ if (blockSize != 0)
+ {
+ assert(blockSize >= CPBLK_MOVS_LIMIT);
+ assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0);
+ }
+ else
+ {
+ noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK);
+ }
+
+ // Source address goes in arg1
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = source->gtGetOp1();
+ assert(!srcAddr->isContained());
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0);
+ inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF);
+ }
+
+ genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+ genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+#else // !_TARGET_AMD64_
+ noway_assert(false && "Helper call for CpBlk is not needed.");
+#endif // !_TARGET_AMD64_
+}
+
+// generate code do a switch statement based on a table of ip-relative offsets
+void CodeGen::genTableBasedSwitch(GenTree* treeNode)
+{
+ genConsumeOperands(treeNode->AsOp());
+ regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
+ regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
+
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ // load the ip-relative offset (which is relative to start of fgFirstBB)
+ getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
+
+ // add it to the absolute address of fgFirstBB
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
+ getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
+ // jmp baseReg
+ getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
+}
+
+// emits the table and an instruction to get the address of the first element
+void CodeGen::genJumpTable(GenTree* treeNode)
+{
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+ assert(treeNode->OperGet() == GT_JMPTABLE);
+
+ unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+ unsigned jmpTabOffs;
+ unsigned jmpTabBase;
+
+ jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
+
+ jmpTabOffs = 0;
+
+ JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+
+ for (unsigned i = 0; i < jumpCount; i++)
+ {
+ BasicBlock* target = *jumpTable++;
+ noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+ JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+
+ getEmitter()->emitDataGenData(i, target);
+ };
+
+ getEmitter()->emitDataGenEnd();
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+ getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum,
+ compiler->eeFindJitDataOffs(jmpTabBase), 0);
+ genProduceReg(treeNode);
+}
+
+// generate code for the locked operations:
+// GT_LOCKADD, GT_XCHG, GT_XADD
+void CodeGen::genLockedInstructions(GenTree* treeNode)
+{
+ GenTree* data = treeNode->gtOp.gtOp2;
+ GenTree* addr = treeNode->gtOp.gtOp1;
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber dataReg = data->gtRegNum;
+ regNumber addrReg = addr->gtRegNum;
+ instruction ins;
+
+ // all of these nodes implicitly do an indirection on op1
+ // so create a temporary node to feed into the pattern matching
+ GenTreeIndir i = indirForm(data->TypeGet(), addr);
+ genConsumeReg(addr);
+
+ // The register allocator should have extended the lifetime of the address
+ // so that it is not used as the target.
+ noway_assert(addrReg != targetReg);
+
+ // If data is a lclVar that's not a last use, we'd better have allocated a register
+ // for the result (except in the case of GT_LOCKADD which does not produce a register result).
+ assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
+ (data->gtFlags & GTF_VAR_DEATH) != 0);
+
+ genConsumeIfReg(data);
+ if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
+ data->gtRegNum = targetReg;
+
+ // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
+ // original gtRegNum on data, after calling emitInsBinary below.
+ }
+ switch (treeNode->OperGet())
+ {
+ case GT_LOCKADD:
+ instGen(INS_lock);
+ ins = INS_add;
+ break;
+ case GT_XCHG:
+ // lock is implied by xchg
+ ins = INS_xchg;
+ break;
+ case GT_XADD:
+ instGen(INS_lock);
+ ins = INS_xadd;
+ break;
+ default:
+ unreached();
+ }
+ getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ genProduceReg(treeNode);
+ }
+}
+
+// generate code for BoundsCheck nodes
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+#ifdef FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
+#else // !FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+#endif // !FEATURE_SIMD
+
+ GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+ GenTreePtr arrLen = bndsChk->gtArrLen;
+ GenTreePtr arrIndex = bndsChk->gtIndex;
+ GenTreePtr arrRef = nullptr;
+ int lenOffset = 0;
+
+ GenTree * src1, *src2;
+ emitJumpKind jmpKind;
+
+ genConsumeRegs(arrLen);
+ genConsumeRegs(arrIndex);
+
+ if (arrIndex->isContainedIntOrIImmed())
+ {
+ // arrIndex is a contained constant. In this case
+ // we will generate one of the following
+ // cmp [mem], immed (if arrLen is a memory op)
+ // cmp reg, immed (if arrLen is in a reg)
+ //
+ // That is arrLen cannot be a contained immed.
+ assert(!arrLen->isContainedIntOrIImmed());
+
+ src1 = arrLen;
+ src2 = arrIndex;
+ jmpKind = EJ_jbe;
+ }
+ else
+ {
+ // arrIndex could either be a contained memory op or a reg
+ // In this case we will generate one of the following
+ // cmp [mem], immed (if arrLen is a constant)
+ // cmp [mem], reg (if arrLen is in a reg)
+ // cmp reg, immed (if arrIndex is in a reg)
+ // cmp reg1, reg2 (if arraIndex is in reg1)
+ // cmp reg, [mem] (if arrLen is a memory op)
+ //
+ // That is only one of arrIndex or arrLen can be a memory op.
+ assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
+
+ src1 = arrIndex;
+ src2 = arrLen;
+ jmpKind = EJ_jae;
+ }
+
+ var_types bndsChkType = src2->TypeGet();
+#if DEBUG
+ // Bounds checks can only be 32 or 64 bit sized comparisons.
+ assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
+
+ // The type of the bounds check should always wide enough to compare against the index.
+ assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
+#endif // DEBUG
+
+ getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
+ genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
+// lower bound for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+
+unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
+// size for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+
+unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+// producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+// arrIndex - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+
+void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+ GenTreePtr arrObj = arrIndex->ArrObj();
+ GenTreePtr indexNode = arrIndex->IndexExpr();
+
+ regNumber arrReg = genConsumeReg(arrObj);
+ regNumber indexReg = genConsumeReg(indexNode);
+ regNumber tgtReg = arrIndex->gtRegNum;
+
+ unsigned dim = arrIndex->gtCurrDim;
+ unsigned rank = arrIndex->gtArrRank;
+ var_types elemType = arrIndex->gtArrElemType;
+
+ noway_assert(tgtReg != REG_NA);
+
+ // Subtract the lower bound for this dimension.
+ // TODO-XArch-CQ: make this contained if it's an immediate that fits.
+ if (tgtReg != indexReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
+ }
+ getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
+ genOffsetOfMDArrayLowerBound(elemType, rank, dim));
+ getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
+ genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
+ genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
+
+ genProduceReg(arrIndex);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+// one dimension of an array reference:
+// result = (prevDimOffset * dimSize) + effectiveIndex
+// where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+// arrOffset - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+// Notes:
+// dimSize and effectiveIndex are always non-negative, the former by design,
+// and the latter because it has been normalized to be zero-based.
+
+void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+ GenTreePtr offsetNode = arrOffset->gtOffset;
+ GenTreePtr indexNode = arrOffset->gtIndex;
+ GenTreePtr arrObj = arrOffset->gtArrObj;
+
+ regNumber tgtReg = arrOffset->gtRegNum;
+
+ noway_assert(tgtReg != REG_NA);
+
+ unsigned dim = arrOffset->gtCurrDim;
+ unsigned rank = arrOffset->gtArrRank;
+ var_types elemType = arrOffset->gtArrElemType;
+
+ // We will use a temp register for the offset*scale+effectiveIndex computation.
+ regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+
+ // First, consume the operands in the correct order.
+ regNumber offsetReg = REG_NA;
+ if (!offsetNode->IsIntegralConst(0))
+ {
+ offsetReg = genConsumeReg(offsetNode);
+ }
+ else
+ {
+ assert(offsetNode->isContained());
+ }
+ regNumber indexReg = genConsumeReg(indexNode);
+ // Although arrReg may not be used in the constant-index case, if we have generated
+ // the value into a register, we must consume it, otherwise we will fail to end the
+ // live range of the gc ptr.
+ // TODO-CQ: Currently arrObj will always have a register allocated to it.
+ // We could avoid allocating a register for it, which would be of value if the arrObj
+ // is an on-stack lclVar.
+ regNumber arrReg = REG_NA;
+ if (arrObj->gtHasReg())
+ {
+ arrReg = genConsumeReg(arrObj);
+ }
+
+ if (!offsetNode->IsIntegralConst(0))
+ {
+ // Evaluate tgtReg = offsetReg*dim_size + indexReg.
+ // tmpReg is used to load dim_size and the result of the multiplication.
+ // Note that dim_size will never be negative.
+
+ getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg,
+ genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
+ inst_RV_RV(INS_imul, tmpReg, offsetReg);
+
+ if (tmpReg == tgtReg)
+ {
+ inst_RV_RV(INS_add, tmpReg, indexReg);
+ }
+ else
+ {
+ if (indexReg != tgtReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
+ }
+ inst_RV_RV(INS_add, tgtReg, tmpReg);
+ }
+ }
+ else
+ {
+ if (indexReg != tgtReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
+ }
+ }
+ genProduceReg(arrOffset);
+}
+
+// make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen
+//
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+{
+ GenTreeIndir i(GT_IND, type, base, nullptr);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+// make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate
+//
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+ GenTreeIntCon i(type, value);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
+{
+ instruction ins;
+
+ // Operations on SIMD vectors shouldn't come this path
+ assert(!varTypeIsSIMD(type));
+ if (varTypeIsFloating(type))
+ {
+ return ins_MathOp(oper, type);
+ }
+
+ switch (oper)
+ {
+ case GT_ADD:
+ ins = INS_add;
+ break;
+ case GT_AND:
+ ins = INS_and;
+ break;
+ case GT_LSH:
+ ins = INS_shl;
+ break;
+ case GT_MUL:
+ ins = INS_imul;
+ break;
+ case GT_NEG:
+ ins = INS_neg;
+ break;
+ case GT_NOT:
+ ins = INS_not;
+ break;
+ case GT_OR:
+ ins = INS_or;
+ break;
+ case GT_ROL:
+ ins = INS_rol;
+ break;
+ case GT_ROR:
+ ins = INS_ror;
+ break;
+ case GT_RSH:
+ ins = INS_sar;
+ break;
+ case GT_RSZ:
+ ins = INS_shr;
+ break;
+ case GT_SUB:
+ ins = INS_sub;
+ break;
+ case GT_XOR:
+ ins = INS_xor;
+ break;
+#if !defined(_TARGET_64BIT_)
+ case GT_ADD_LO:
+ ins = INS_add;
+ break;
+ case GT_ADD_HI:
+ ins = INS_adc;
+ break;
+ case GT_SUB_LO:
+ ins = INS_sub;
+ break;
+ case GT_SUB_HI:
+ ins = INS_sbb;
+ break;
+#endif // !defined(_TARGET_64BIT_)
+ default:
+ unreached();
+ break;
+ }
+ return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+// b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or
+// it's a register-allocated expression. If it is in a register that is
+// not RCX, it will be moved to RCX (so RCX better not be in use!).
+//
+void CodeGen::genCodeForShift(GenTreePtr tree)
+{
+ // Only the non-RMW case here.
+ assert(tree->OperIsShiftOrRotate());
+ assert(!tree->gtOp.gtOp1->isContained());
+ assert(tree->gtRegNum != REG_NA);
+
+ genConsumeOperands(tree->AsOp());
+
+ var_types targetType = tree->TypeGet();
+ instruction ins = genGetInsForOper(tree->OperGet(), targetType);
+
+ GenTreePtr operand = tree->gtGetOp1();
+ regNumber operandReg = operand->gtRegNum;
+
+ GenTreePtr shiftBy = tree->gtGetOp2();
+ if (shiftBy->isContainedIntOrIImmed())
+ {
+ // First, move the operand to the destination register and
+ // later on perform the shift in-place.
+ // (LSRA will try to avoid this situation through preferencing.)
+ if (tree->gtRegNum != operandReg)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
+ }
+
+ int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
+ inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue);
+ }
+ else
+ {
+ // We must have the number of bits to shift stored in ECX, since we constrained this node to
+ // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
+ // register destination requirement.
+ regNumber shiftReg = shiftBy->gtRegNum;
+ if (shiftReg != REG_RCX)
+ {
+ // Issue the mov to RCX:
+ inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
+ }
+
+ // The operand to be shifted must not be in ECX
+ noway_assert(operandReg != REG_RCX);
+
+ if (tree->gtRegNum != operandReg)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
+ }
+ inst_RV_CL(ins, tree->gtRegNum, targetType);
+ }
+
+ genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
+// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
+// GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) )
+//
+// Arguments:
+// storeIndNode: the GT_STOREIND node.
+//
+void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
+{
+ GenTree* data = storeInd->Data();
+ GenTree* addr = storeInd->Addr();
+
+ assert(data->OperIsShiftOrRotate());
+
+ // This function only handles the RMW case.
+ assert(data->gtOp.gtOp1->isContained());
+ assert(data->gtOp.gtOp1->isIndir());
+ assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
+ assert(data->gtRegNum == REG_NA);
+
+ var_types targetType = data->TypeGet();
+ genTreeOps oper = data->OperGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+ emitAttr attr = EA_ATTR(genTypeSize(targetType));
+
+ GenTree* shiftBy = data->gtOp.gtOp2;
+ if (shiftBy->isContainedIntOrIImmed())
+ {
+ int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
+ ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
+ if (shiftByValue == 1)
+ {
+ // There is no source in this case, as the shift by count is embedded in the instruction opcode itself.
+ getEmitter()->emitInsRMW(ins, attr, storeInd);
+ }
+ else
+ {
+ getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy);
+ }
+ }
+ else
+ {
+ // We must have the number of bits to shift stored in ECX, since we constrained this node to
+ // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
+ // register destination requirement.
+ regNumber shiftReg = shiftBy->gtRegNum;
+ if (shiftReg != REG_RCX)
+ {
+ // Issue the mov to RCX:
+ inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
+ }
+
+ // The shiftBy operand is implicit, so call the unary version of emitInsRMW.
+ getEmitter()->emitInsRMW(ins, attr, storeInd);
+ }
+}
+
+void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
+{
+ regNumber dstReg = tree->gtRegNum;
+ GenTree* unspillTree = tree;
+
+ if (tree->gtOper == GT_RELOAD)
+ {
+ unspillTree = tree->gtOp.gtOp1;
+ }
+
+ if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
+ {
+ if (genIsRegCandidateLocal(unspillTree))
+ {
+ // Reset spilled flag, since we are going to load a local variable from its home location.
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+
+ GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // Load local variable from its home location.
+ // In most cases the tree type will indicate the correct type to use for the load.
+ // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
+ // widened when loaded into a register), and its size is not the same as genActualType of
+ // the type of the lclVar, then we need to change the type of the tree node when loading.
+ // This situation happens due to "optimizations" that avoid a cast and
+ // simply retype the node when using long type lclVar as an int.
+ // While loading the int in that case would work for this use of the lclVar, if it is
+ // later used as a long, we will have incorrectly truncated the long.
+ // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
+ // extending load.
+
+ var_types treeType = unspillTree->TypeGet();
+ if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
+ {
+ assert(!varTypeIsGC(varDsc));
+ var_types spillType = genActualType(varDsc->lvType);
+ unspillTree->gtType = spillType;
+ inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+ unspillTree->gtType = treeType;
+ }
+ else
+ {
+ inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+ }
+
+ unspillTree->SetInReg();
+
+ // TODO-Review: We would like to call:
+ // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+ // instead of the following code, but this ends up hitting this assert:
+ // assert((regSet.rsMaskVars & regMask) == 0);
+ // due to issues with LSRA resolution moves.
+ // So, just force it for now. This probably indicates a condition that creates a GC hole!
+ //
+ // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+ // because the variable is not really going live or dead, but that method is somewhat poorly
+ // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+ // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
+
+ // Don't update the variable's location if we are just re-spilling it again.
+
+ if ((unspillTree->gtFlags & GTF_SPILL) == 0)
+ {
+ genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+ if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+ varDsc->PrintVarReg();
+ printf(" is becoming live ");
+ compiler->printTreeID(unspillTree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ regSet.AddMaskVars(genGetRegMask(varDsc));
+ }
+
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+ else if (unspillTree->IsMultiRegCall())
+ {
+ GenTreeCall* call = unspillTree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ GenTreeCopyOrReload* reloadTree = nullptr;
+ if (tree->OperGet() == GT_RELOAD)
+ {
+ reloadTree = tree->AsCopyOrReload();
+ }
+
+ // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
+ // one or more of its result regs are spilled. Call node needs to be
+ // queried to know which specific result regs to be unspilled.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ unsigned flags = call->GetRegSpillFlagByIdx(i);
+ if ((flags & GTF_SPILLED) != 0)
+ {
+ var_types dstType = retTypeDesc->GetReturnRegType(i);
+ regNumber unspillTreeReg = call->GetRegNumByIdx(i);
+
+ if (reloadTree != nullptr)
+ {
+ dstReg = reloadTree->GetRegNumByIdx(i);
+ if (dstReg == REG_NA)
+ {
+ dstReg = unspillTreeReg;
+ }
+ }
+ else
+ {
+ dstReg = unspillTreeReg;
+ }
+
+ TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
+ getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
+ 0);
+ compiler->tmpRlsTemp(t);
+ gcInfo.gcMarkRegPtrVal(dstReg, dstType);
+ }
+ }
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ }
+ else
+ {
+ TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
+ getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
+ t->tdTempNum(), 0);
+ compiler->tmpRlsTemp(t);
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+ }
+}
+
+// Do Liveness update for a subnodes that is being consumed by codegen
+// including the logic for reload in case is needed and also takes care
+// of locating the value on the desired register.
+void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
+{
+ if (needReg == REG_NA)
+ {
+ return;
+ }
+ regNumber treeReg = genConsumeReg(tree);
+ if (treeReg != needReg)
+ {
+ inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
+ }
+}
+
+void CodeGen::genRegCopy(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_COPY);
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+
+ if (op1->IsMultiRegCall())
+ {
+ genConsumeReg(op1);
+
+ GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload();
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber fromReg = call->GetRegNumByIdx(i);
+ regNumber toReg = copyTree->GetRegNumByIdx(i);
+
+ // A Multi-reg GT_COPY node will have valid reg only for those
+ // positions that corresponding result reg of call node needs
+ // to be copied.
+ if (toReg != REG_NA)
+ {
+ assert(toReg != fromReg);
+ inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
+ }
+ }
+ }
+ else
+ {
+ var_types targetType = treeNode->TypeGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(targetReg != REG_NA);
+
+ // Check whether this node and the node from which we're copying the value have
+ // different register types. This can happen if (currently iff) we have a SIMD
+ // vector type that fits in an integer register, in which case it is passed as
+ // an argument, or returned from a call, in an integer register and must be
+ // copied if it's in an xmm register.
+
+ bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
+ bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
+ if (srcFltReg != tgtFltReg)
+ {
+ instruction ins;
+ regNumber fpReg;
+ regNumber intReg;
+ if (tgtFltReg)
+ {
+ ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
+ fpReg = targetReg;
+ intReg = op1->gtRegNum;
+ }
+ else
+ {
+ ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
+ intReg = targetReg;
+ fpReg = op1->gtRegNum;
+ }
+ inst_RV_RV(ins, fpReg, intReg, targetType);
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
+ }
+
+ if (op1->IsLocal())
+ {
+ // The lclVar will never be a def.
+ // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
+ // appropriately set the gcInfo for the copied value.
+ // If not, there are two cases we need to handle:
+ // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
+ // will remain live in its original register.
+ // genProduceReg() will appropriately set the gcInfo for the copied value,
+ // and genConsumeReg will reset it.
+ // - Otherwise, we need to update register info for the lclVar.
+
+ GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+ assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
+
+ if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // If we didn't just spill it (in genConsumeReg, above), then update the register info
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ // The old location is dying
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
+
+ genUpdateVarReg(varDsc, treeNode);
+
+ // The new location is going live
+ genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+ }
+ }
+ }
+ }
+
+ genProduceReg(treeNode);
+}
+
+// Check that registers are consumed in the right order for the current node being generated.
+#ifdef DEBUG
+void CodeGen::genCheckConsumeNode(GenTree* treeNode)
+{
+ // GT_PUTARG_REG is consumed out of order.
+ if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
+ {
+ if (lastConsumedNode != nullptr)
+ {
+ if (treeNode == lastConsumedNode)
+ {
+ if (verbose)
+ {
+ printf("Node was consumed twice:\n ");
+ compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ }
+ }
+ else
+ {
+ if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
+ {
+ printf("Nodes were consumed out-of-order:\n");
+ compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
+ compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ }
+ // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
+ }
+ }
+ lastConsumedNode = treeNode;
+ }
+}
+#endif // DEBUG
+
+//--------------------------------------------------------------------
+// genConsumeReg: Do liveness update for a subnode that is being
+// consumed by codegen.
+//
+// Arguments:
+// tree - GenTree node
+//
+// Return Value:
+// Returns the reg number of tree.
+// In case of multi-reg call node returns the first reg number
+// of the multi-reg return.
+regNumber CodeGen::genConsumeReg(GenTree* tree)
+{
+ if (tree->OperGet() == GT_COPY)
+ {
+ genRegCopy(tree);
+ }
+
+ // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
+ // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
+ // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
+ // always using GT_COPY to make the lclVar location explicit.
+ // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
+ // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
+ // the lclVar (normally when a lclVar is spilled it is then used from its former register
+ // location, which matches the gtRegNum on the node).
+ // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
+ // because if it's on the stack it will always get reloaded into tree->gtRegNum).
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
+ }
+ }
+
+ genUnspillRegIfNeeded(tree);
+
+ // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+ genUpdateLife(tree);
+
+ assert(tree->gtHasReg());
+
+ // there are three cases where consuming a reg means clearing the bit in the live mask
+ // 1. it was not produced by a local
+ // 2. it was produced by a local that is going dead
+ // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ assert(varDsc->lvLRACandidate);
+
+ if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // We have loaded this into a register only temporarily
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
+ }
+
+ genCheckConsumeNode(tree);
+ return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+ if (addr->OperGet() == GT_LEA)
+ {
+ genConsumeAddrMode(addr->AsAddrMode());
+ }
+ else if (!addr->isContained())
+ {
+ genConsumeReg(addr);
+ }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+{
+ genConsumeOperands(addr);
+}
+
+void CodeGen::genConsumeRegs(GenTree* tree)
+{
+#if !defined(_TARGET_64BIT_)
+ if (tree->OperGet() == GT_LONG)
+ {
+ genConsumeRegs(tree->gtGetOp1());
+ genConsumeRegs(tree->gtGetOp2());
+ return;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ if (tree->isContained())
+ {
+ if (tree->isContainedSpillTemp())
+ {
+ // spill temps are un-tracked and hence no need to update life
+ }
+ else if (tree->isIndir())
+ {
+ genConsumeAddress(tree->AsIndir()->Addr());
+ }
+ else if (tree->OperGet() == GT_AND)
+ {
+ // This is the special contained GT_AND that we created in Lowering::LowerCmp()
+ // Now we need to consume the operands of the GT_AND node.
+ genConsumeOperands(tree->AsOp());
+ }
+ else if (tree->OperGet() == GT_LCL_VAR)
+ {
+ // A contained lcl var must be living on stack and marked as reg optional.
+ unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+ noway_assert(varDsc->lvRegNum == REG_STK);
+ noway_assert(tree->IsRegOptional());
+
+ // Update the life of reg optional lcl var.
+ genUpdateLife(tree);
+ }
+ else
+ {
+ assert(tree->OperIsLeaf());
+ }
+ }
+ else
+ {
+ genConsumeReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
+//
+// Arguments:
+// tree - the GenTreeOp whose operands will have their liveness updated.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Note that this logic is localized here because we must do the liveness update in
+// the correct execution order. This is important because we may have two operands
+// that involve the same lclVar, and if one is marked "lastUse" we must handle it
+// after the first.
+
+void CodeGen::genConsumeOperands(GenTreeOp* tree)
+{
+ GenTree* firstOp = tree->gtOp1;
+ GenTree* secondOp = tree->gtOp2;
+ if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+ {
+ assert(secondOp != nullptr);
+ firstOp = secondOp;
+ secondOp = tree->gtOp1;
+ }
+ if (firstOp != nullptr)
+ {
+ genConsumeRegs(firstOp);
+ }
+ if (secondOp != nullptr)
+ {
+ genConsumeRegs(secondOp);
+ }
+}
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+//------------------------------------------------------------------------
+// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
+// Also loads in the right register the addresses of the
+// src/dst for rep mov operation.
+//
+// Arguments:
+// putArgNode - the PUTARG_STK tree.
+// dstReg - the dstReg for the rep move operation.
+// srcReg - the srcReg for the rep move operation.
+// sizeReg - the sizeReg for the rep move operation.
+// baseVarNum - the varnum for the local used for placing the "by-value" args on the stack.
+//
+// Return Value:
+// None.
+//
+// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
+// for copying on the stack a struct with references.
+// The source address/offset is determined from the address on the GT_OBJ node, while
+// the destination address is the address contained in 'baseVarNum' plus the offset
+// provided in the 'putArgNode'.
+
+void CodeGen::genConsumePutStructArgStk(
+ GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
+{
+ assert(varTypeIsStruct(putArgNode));
+ assert(baseVarNum != BAD_VAR_NUM);
+
+ // The putArgNode children are always contained. We should not consume any registers.
+ assert(putArgNode->gtGetOp1()->isContained());
+
+ GenTree* dstAddr = putArgNode;
+
+ // Get the source address.
+ GenTree* src = putArgNode->gtGetOp1();
+ assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
+ GenTree* srcAddr = src->gtGetOp1();
+
+ size_t size = putArgNode->getArgSize();
+
+ assert(dstReg != REG_NA);
+ assert(srcReg != REG_NA);
+
+ // Consume the registers only if they are not contained or set to REG_NA.
+ if (srcAddr->gtRegNum != REG_NA)
+ {
+ genConsumeReg(srcAddr);
+ }
+
+ // If the op1 is already in the dstReg - nothing to do.
+ // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
+ if (dstAddr->gtRegNum != dstReg)
+ {
+ // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
+ // for tail calls) in RDI.
+ // Destination is always local (on the stack) - use EA_PTRSIZE.
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
+ }
+
+ if (srcAddr->gtRegNum != srcReg)
+ {
+ if (srcAddr->OperIsLocalAddr())
+ {
+ // The OperLocalAddr is always contained.
+ assert(srcAddr->isContained());
+ GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
+
+ // Generate LEA instruction to load the LclVar address in RSI.
+ // Source is known to be on the stack. Use EA_PTRSIZE.
+ unsigned int offset = 0;
+ if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
+ {
+ offset = srcAddr->AsLclFld()->gtLclOffs;
+ }
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
+ }
+ else
+ {
+ assert(srcAddr->gtRegNum != REG_NA);
+ // Source is not known to be on the stack. Use EA_BYREF.
+ getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
+ }
+ }
+
+ if (sizeReg != REG_NA)
+ {
+ inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
+ }
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//------------------------------------------------------------------------
+// genConsumeBlockSize: Ensure that the block size is in the given register
+//
+// Arguments:
+// blkNode - The block node
+// sizeReg - The register into which the block's size should go
+//
+
+void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
+{
+ unsigned blockSize = blkNode->Size();
+ if (sizeReg != REG_NA)
+ {
+ if (blockSize != 0)
+ {
+ assert(blkNode->gtRsvdRegs == genRegMask(sizeReg));
+ genSetRegToIcon(sizeReg, blockSize);
+ }
+ else
+ {
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ genConsumeRegAndCopy(blkNode->AsDynBlk()->gtDynamicSize, sizeReg);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockDst: Ensure that the block destination address is in its
+// allocated register.
+// Arguments:
+// blkNode - The block node
+//
+
+void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ genConsumeReg(dstAddr);
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockSrc: Ensure that the block source address is in its
+// allocated register if it is non-local.
+// Arguments:
+// blkNode - The block node
+//
+// Return Value:
+// Returns the source address node, if it is non-local,
+// and nullptr otherwise.
+
+GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
+{
+ GenTree* src = blkNode->Data();
+ if (blkNode->OperIsCopyBlkOp())
+ {
+ // For a CopyBlk we need the address of the source.
+ if (src->OperGet() == GT_IND)
+ {
+ src = src->gtOp.gtOp1;
+ }
+ else
+ {
+ // This must be a local.
+ // For this case, there is no source address register, as it is a
+ // stack-based address.
+ assert(src->OperIsLocal());
+ return nullptr;
+ }
+ }
+ genConsumeReg(src);
+ return src;
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockOp: Ensure that the block's operands are enregistered
+// as needed.
+// Arguments:
+// blkNode - The block node
+//
+// Notes:
+// This ensures that the operands are consumed in the proper order to
+// obey liveness modeling.
+
+void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
+{
+ // We have to consume the registers, and perform any copies, in the actual execution order.
+ // The nominal order is: dst, src, size. However this may have been changed
+ // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
+ // block size.
+ // Note that the register allocator ensures that the registers ON THE NODES will not interfere
+ // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
+ // Further, it ensures that they will not interfere with one another if they are then copied
+ // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
+ // then, that we first consume all the operands, then do any necessary moves.
+
+ GenTree* dstAddr = blkNode->Addr();
+ GenTree* src = nullptr;
+ unsigned blockSize = blkNode->Size();
+ GenTree* size = nullptr;
+ bool evalSizeFirst = true;
+
+ if (blkNode->OperGet() == GT_STORE_DYN_BLK)
+ {
+ evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst;
+ size = blkNode->AsDynBlk()->gtDynamicSize;
+ }
+
+ // First, consusme all the sources in order
+ if (evalSizeFirst)
+ {
+ genConsumeBlockSize(blkNode, sizeReg);
+ }
+ if (blkNode->IsReverseOp())
+ {
+ src = genConsumeBlockSrc(blkNode);
+ genConsumeBlockDst(blkNode);
+ }
+ else
+ {
+ genConsumeBlockDst(blkNode);
+ src = genConsumeBlockSrc(blkNode);
+ }
+ if (!evalSizeFirst)
+ {
+ genConsumeBlockSize(blkNode, sizeReg);
+ }
+ // Next, perform any necessary moves.
+ if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg))
+ {
+ inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
+ }
+ if (blkNode->IsReverseOp())
+ {
+ if ((src != nullptr) && (src->gtRegNum != srcReg))
+ {
+ inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
+ }
+ if (dstAddr->gtRegNum != dstReg)
+ {
+ inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
+ }
+ }
+ else
+ {
+ if (dstAddr->gtRegNum != dstReg)
+ {
+ inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
+ }
+ if ((src != nullptr) && (src->gtRegNum != srcReg))
+ {
+ inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
+ }
+ }
+ if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg))
+ {
+ inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
+ }
+}
+
+//-------------------------------------------------------------------------
+// genProduceReg: do liveness update for register produced by the current
+// node in codegen.
+//
+// Arguments:
+// tree - Gentree node
+//
+// Return Value:
+// None.
+void CodeGen::genProduceReg(GenTree* tree)
+{
+ if (tree->gtFlags & GTF_SPILL)
+ {
+ // Code for GT_COPY node gets generated as part of consuming regs by its parent.
+ // A GT_COPY node in turn produces reg result and it should never be marked to
+ // spill.
+ //
+ // Similarly GT_RELOAD node gets generated as part of consuming regs by its
+ // parent and should never be marked for spilling.
+ noway_assert(!tree->IsCopyOrReload());
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ // Store local variable to its home location.
+ tree->gtFlags &= ~GTF_REG_VAL;
+ // Ensure that lclVar stores are typed correctly.
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
+ (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
+ inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
+ }
+ else
+ {
+ // In case of multi-reg call node, spill flag on call node
+ // indicates that one or more of its allocated regs need to
+ // be spilled. Call node needs to be further queried to
+ // know which of its result regs needs to be spilled.
+ if (tree->IsMultiRegCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ unsigned flags = call->GetRegSpillFlagByIdx(i);
+ if ((flags & GTF_SPILL) != 0)
+ {
+ regNumber reg = call->GetRegNumByIdx(i);
+ call->SetInReg();
+ regSet.rsSpillTree(reg, call, i);
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ }
+ }
+ }
+ else
+ {
+ tree->SetInReg();
+ regSet.rsSpillTree(tree->gtRegNum, tree);
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+
+ tree->gtFlags |= GTF_SPILLED;
+ tree->gtFlags &= ~GTF_SPILL;
+
+ return;
+ }
+ }
+
+ genUpdateLife(tree);
+
+ // If we've produced a register, mark it as a pointer, as needed.
+ if (tree->gtHasReg())
+ {
+ // We only mark the register in the following cases:
+ // 1. It is not a register candidate local. In this case, we're producing a
+ // register from a local, but the local is not a register candidate. Thus,
+ // we must be loading it as a temp register, and any "last use" flag on
+ // the register wouldn't be relevant.
+ // 2. The register candidate local is going dead. There's no point to mark
+ // the register as live, with a GC pointer, if the variable is dead.
+ if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
+ {
+ // Multi-reg call node will produce more than one register result.
+ // Mark all the regs produced by call node.
+ if (tree->IsMultiRegCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ regNumber reg = call->GetRegNumByIdx(i);
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ gcInfo.gcMarkRegPtrVal(reg, type);
+ }
+ }
+ else if (tree->IsCopyOrReloadOfMultiRegCall())
+ {
+ // we should never see reload of multi-reg call here
+ // because GT_RELOAD gets generated in reg consuming path.
+ noway_assert(tree->OperGet() == GT_COPY);
+
+ // A multi-reg GT_COPY node produces those regs to which
+ // copy has taken place.
+ GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
+ GenTreeCall* call = copy->gtGetOp1()->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber fromReg = call->GetRegNumByIdx(i);
+ regNumber toReg = copy->GetRegNumByIdx(i);
+
+ if (toReg != REG_NA)
+ {
+ gcInfo.gcMarkRegPtrVal(toReg, type);
+ }
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+ }
+ }
+ }
+ tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+ regMaskTP srcMask = genRegMask(src);
+ regMaskTP dstMask = genRegMask(dst);
+
+ if (gcInfo.gcRegGCrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetGCref(dstMask);
+ }
+ else if (gcInfo.gcRegByrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetByref(dstMask);
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(dstMask);
+ }
+}
+
+// generates an ip-relative call or indirect call via reg ('call reg')
+// pass in 'addr' for a relative call or 'base' for a indirect register call
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset,
+ regNumber base,
+ bool isJump,
+ bool isNoGC)
+{
+#if !defined(_TARGET_X86_)
+ ssize_t argSize = 0;
+#endif // !defined(_TARGET_X86_)
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
+ emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
+}
+
+// generates an indirect call via addressing mode (call []) given an indir node
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset)
+{
+#if !defined(_TARGET_X86_)
+ ssize_t argSize = 0;
+#endif // !defined(_TARGET_X86_)
+ genConsumeAddress(indir->Addr());
+
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
+ argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
+ indir->Base() ? indir->Base()->gtRegNum : REG_NA,
+ indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
+}
+
+//------------------------------------------------------------------------
+// genStoreInd: Generate code for a GT_STOREIND node.
+//
+// Arguments:
+// treeNode - The GT_STOREIND node for which to generate code.
+//
+// Return Value:
+// none
+
+void CodeGen::genStoreInd(GenTreePtr node)
+{
+ assert(node->OperGet() == GT_STOREIND);
+
+#ifdef FEATURE_SIMD
+ // Storing Vector3 of size 12 bytes through indirection
+ if (node->TypeGet() == TYP_SIMD12)
+ {
+ genStoreIndTypeSIMD12(node);
+ return;
+ }
+#endif // FEATURE_SIMD
+
+ GenTreeStoreInd* storeInd = node->AsStoreInd();
+ GenTree* data = storeInd->Data();
+ GenTree* addr = storeInd->Addr();
+ var_types targetType = storeInd->TypeGet();
+
+ assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
+
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(storeInd, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ // data and addr must be in registers.
+ // Consume both registers so that any copies of interfering registers are taken care of.
+ genConsumeOperands(storeInd->AsOp());
+
+ if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
+ {
+ return;
+ }
+
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_ARG_0);
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_ARG_0)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // data goes in REG_ARG_1
+ if (data->gtRegNum != REG_ARG_1)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+ }
+
+ genGCWriteBarrier(storeInd, writeBarrierForm);
+ }
+ else
+ {
+ bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
+ bool dataIsUnary = false;
+ bool isRMWMemoryOp = storeInd->IsRMWMemoryOp();
+ GenTree* rmwSrc = nullptr;
+
+ // We must consume the operands in the proper execution order, so that liveness is
+ // updated appropriately.
+ if (!reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained
+ // and non-indir operand of data is the source of RMW memory op.
+ if (isRMWMemoryOp)
+ {
+ assert(data->isContained() && !data->OperIsLeaf());
+
+ GenTreePtr rmwDst = nullptr;
+
+ dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
+ if (!dataIsUnary)
+ {
+ if (storeInd->IsRMWDstOp1())
+ {
+ rmwDst = data->gtGetOp1();
+ rmwSrc = data->gtGetOp2();
+ }
+ else
+ {
+ assert(storeInd->IsRMWDstOp2());
+ rmwDst = data->gtGetOp2();
+ rmwSrc = data->gtGetOp1();
+ }
+
+ genConsumeRegs(rmwSrc);
+ }
+ else
+ {
+ // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
+ // For unary RMW ops, src and dst of RMW memory op is the same. Lower
+ // clears operand counts on rmwSrc and we don't need to perform a
+ // genConsumeReg() on it.
+ assert(storeInd->IsRMWDstOp1());
+ rmwSrc = data->gtGetOp1();
+ rmwDst = data->gtGetOp1();
+ assert(rmwSrc->isContained());
+ }
+
+ assert(rmwSrc != nullptr);
+ assert(rmwDst != nullptr);
+ assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
+ }
+ else
+ {
+ genConsumeRegs(data);
+ }
+
+ if (reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ if (isRMWMemoryOp)
+ {
+ if (dataIsUnary)
+ {
+ // generate code for unary RMW memory ops like neg/not
+ getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
+ storeInd);
+ }
+ else
+ {
+ if (data->OperIsShiftOrRotate())
+ {
+ // Generate code for shift RMW memory ops.
+ // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] =
+ // <amount> <shift> [addr]).
+ assert(storeInd->IsRMWDstOp1());
+ assert(rmwSrc == data->gtGetOp2());
+ genCodeForShiftRMW(storeInd);
+ }
+ else
+ {
+ // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
+ getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
+ storeInd, rmwSrc);
+ }
+ }
+ }
+ else
+ {
+ getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
+// helper functions.
+//
+// Arguments:
+// writeBarrierForm - the write barrier form to use
+// addr - the address at which to do the store
+// data - the data to store
+//
+// Return Value:
+// true if an optimized write barrier form was used, false if not. If this
+// function returns false, the caller must emit a "standard" write barrier.
+
+bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
+{
+ assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
+
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+ bool useOptimizedWriteBarriers = true;
+
+#ifdef DEBUG
+ useOptimizedWriteBarriers =
+ (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
+#endif
+
+ if (!useOptimizedWriteBarriers)
+ {
+ return false;
+ }
+
+ const static int regToHelper[2][8] = {
+ // If the target is known to be in managed memory
+ {
+ CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
+ CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
+ },
+
+ // Don't know if the target is in managed memory
+ {
+ CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
+ CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
+ CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
+ },
+ };
+
+ noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
+ noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
+ noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
+ noway_assert(regToHelper[0][REG_ESP] == -1);
+ noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
+ noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
+ noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
+
+ noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
+ noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
+ noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
+ noway_assert(regToHelper[1][REG_ESP] == -1);
+ noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
+ noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
+ noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
+
+ regNumber reg = data->gtRegNum;
+ noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
+
+ // Generate the following code:
+ // lea edx, addr
+ // call write_barrier_helper_reg
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
+ {
+ inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
+ }
+
+ unsigned tgtAnywhere = 0;
+ if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
+ {
+ tgtAnywhere = 1;
+ }
+
+ // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
+ // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
+ // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
+
+ genEmitHelperCall(regToHelper[tgtAnywhere][reg],
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+
+ return true;
+#else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
+ return false;
+#endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
+}
+
+// Produce code for a GT_CALL node
+void CodeGen::genCallInstruction(GenTreePtr node)
+{
+ GenTreeCall* call = node->AsCall();
+ assert(call->gtOper == GT_CALL);
+
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ // all virtuals should have been expanded into a control expression
+ assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+ // Consume all the arg regs
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ continue;
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_LIST)
+ {
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ regNumber argReg = REG_NA;
+
+ if (iterationNum == 0)
+ {
+ argReg = curArgTabEntry->regNum;
+ }
+ else
+ {
+ assert(iterationNum == 1);
+ argReg = curArgTabEntry->otherRegNum;
+ }
+
+ genConsumeReg(putArgRegNode);
+
+ // Validate the putArgRegNode has the right type.
+ assert(putArgRegNode->TypeGet() ==
+ compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
+ .eightByteClassifications[iterationNum],
+ curArgTabEntry->structDesc
+ .eightByteSizes[iterationNum]));
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
+ putArgRegNode->gtRegNum);
+ }
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
+ }
+
+#if FEATURE_VARARG
+ // In the case of a varargs call,
+ // the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the
+ // integer and floating point registers so, let's do that.
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
+ instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
+ inst_RV_RV(ins, argNode->gtRegNum, targetReg);
+ }
+#endif // FEATURE_VARARG
+ }
+
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // The call will pop its arguments.
+ // for each putarg_stk:
+ ssize_t stackArgBytes = 0;
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+ if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
+ {
+#if defined(_TARGET_X86_)
+ assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
+ if (arg->OperGet() == GT_LONG)
+ {
+ assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
+ }
+#endif // defined(_TARGET_X86_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (genActualType(arg->TypeGet()) == TYP_STRUCT)
+ {
+ assert(arg->OperGet() == GT_PUTARG_STK);
+
+ GenTreeObj* obj = arg->gtGetOp1()->AsObj();
+ stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass);
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
+ }
+ args = args->gtOp.gtOp2;
+ }
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ // Insert a null check on "this" pointer if asked.
+ if (call->NeedsNullCheck())
+ {
+ const regNumber regThis = genGetThisArgReg(call);
+ getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+ }
+
+ // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+ CORINFO_METHOD_HANDLE methHnd;
+ GenTree* target = call->gtControlExpr;
+ if (callType == CT_INDIRECT)
+ {
+ assert(target == nullptr);
+ target = call->gtCall.gtCallAddr;
+ methHnd = nullptr;
+ }
+ else
+ {
+ methHnd = call->gtCallMethHnd;
+ }
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->callSig;
+ }
+#endif // DEBUG
+
+ // If fast tail call, then we are done. In this case we setup the args (both reg args
+ // and stack args in incoming arg area) and call target in rax. Epilog sequence would
+ // generate "jmp rax".
+ if (call->IsFastTailCall())
+ {
+ // Don't support fast tail calling JIT helpers
+ assert(callType != CT_HELPER);
+
+ // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+ assert(target != nullptr);
+
+ genConsumeReg(target);
+ if (target->gtRegNum != REG_RAX)
+ {
+ inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
+ }
+ return;
+ }
+
+ // For a pinvoke to unmanged code we emit a label to clear
+ // the GC pointer state before the callsite.
+ // We can't utilize the typical lazy killing of GC pointers
+ // at (or inside) the callsite.
+ if (call->IsUnmanaged())
+ {
+ genDefineTempLabel(genCreateTempLabel());
+ }
+
+ // Determine return value size(s).
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ emitAttr retSize = EA_PTRSIZE;
+ emitAttr secondRetSize = EA_UNKNOWN;
+
+ if (call->HasMultiRegRetVal())
+ {
+ retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0));
+ secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
+ }
+ else
+ {
+ assert(!varTypeIsStruct(call));
+
+ if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+ }
+
+ bool fPossibleSyncHelperCall = false;
+ CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
+
+#ifdef DEBUGGING_SUPPORT
+ // We need to propagate the IL offset information to the call instruction, so we can emit
+ // an IL to native mapping record for the call, to support managed return value debugging.
+ // We don't want tail call helper calls that were converted from normal calls to get a record,
+ // so we skip this hash table lookup logic in that case.
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+#endif // DEBUGGING_SUPPORT
+
+#if defined(_TARGET_X86_)
+ // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
+ // adjust its stack level accordingly.
+ // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
+ // pop when we're done.
+ ssize_t argSizeForEmitter = stackArgBytes;
+ if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0)
+ {
+ argSizeForEmitter = -stackArgBytes;
+ }
+
+#endif // defined(_TARGET_X86_)
+
+ if (target != nullptr)
+ {
+ if (target->isContainedIndir())
+ {
+ if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
+ {
+ // Note that if gtControlExpr is an indir of an absolute address, we mark it as
+ // contained only if it can be encoded as PC-relative offset.
+ assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
+
+ genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)(void*) target->AsIndir()
+ ->Base()
+ ->AsIntConCommon()
+ ->IconValue() X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+ else
+ {
+ genEmitCall(emitter::EC_INDIR_ARD, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir() X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+ }
+ else
+ {
+ // We have already generated code for gtControlExpr evaluating it into a register.
+ // We just need to emit "call reg" in this case.
+ assert(genIsValidIntReg(target->gtRegNum));
+ genEmitCall(emitter::EC_INDIR_R, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr // addr
+ X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, genConsumeReg(target));
+ }
+ }
+#ifdef FEATURE_READYTORUN_COMPILER
+ else if (call->gtEntryPoint.addr != nullptr)
+ {
+ genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
+ : emitter::EC_FUNC_TOKEN_INDIR,
+ methHnd, INDEBUG_LDISASM_COMMA(sigInfo)(void*) call->gtEntryPoint.addr X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+#endif
+ else
+ {
+ // Generate a direct call to a non-virtual user defined or helper method
+ assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+ void* addr = nullptr;
+ if (callType == CT_HELPER)
+ {
+ // Direct call to a helper method.
+ helperNum = compiler->eeGetHelperNum(methHnd);
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void* pAddr = nullptr;
+ addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+ if (addr == nullptr)
+ {
+ addr = pAddr;
+ }
+
+ // tracking of region protected by the monitor in synchronized methods
+ if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ fPossibleSyncHelperCall = true;
+ }
+ }
+ else
+ {
+ // Direct call to a non-virtual user function.
+ addr = call->gtDirectCallAddress;
+ }
+
+ // Non-virtual direct calls to known addresses
+ genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+
+ // if it was a pinvoke we may have needed to get the address of a label
+ if (genPendingCallLabel)
+ {
+ assert(call->IsUnmanaged());
+ genDefineTempLabel(genPendingCallLabel);
+ genPendingCallLabel = nullptr;
+ }
+
+#if defined(_TARGET_X86_)
+ // The call will pop its arguments.
+ genStackLevel -= stackArgBytes;
+#endif // defined(_TARGET_X86_)
+
+ // Update GC info:
+ // All Callee arg registers are trashed and no longer contain any GC pointers.
+ // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+ // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+ // registers from RBM_CALLEE_TRASH.
+ assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+ gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+ var_types returnType = call->TypeGet();
+ if (returnType != TYP_VOID)
+ {
+#ifdef _TARGET_X86_
+ if (varTypeIsFloating(returnType))
+ {
+ // Spill the value from the fp stack.
+ // Then, load it into the target register.
+ call->gtFlags |= GTF_SPILL;
+ regSet.rsSpillFPStack(call);
+ call->gtFlags |= GTF_SPILLED;
+ call->gtFlags &= ~GTF_SPILL;
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ regNumber returnReg;
+
+ if (call->HasMultiRegRetVal())
+ {
+ assert(retTypeDesc != nullptr);
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ // If regs allocated to call node are different from ABI return
+ // regs in which the call has returned its result, move the result
+ // to regs allocated to call node.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types regType = retTypeDesc->GetReturnRegType(i);
+ returnReg = retTypeDesc->GetABIReturnReg(i);
+ regNumber allocatedReg = call->GetRegNumByIdx(i);
+ if (returnReg != allocatedReg)
+ {
+ inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+ }
+ }
+
+#ifdef FEATURE_SIMD
+ // A Vector3 return value is stored in xmm0 and xmm1.
+ // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
+ // the native compiler doesn't guarantee it.
+ if (returnType == TYP_SIMD12)
+ {
+ returnReg = retTypeDesc->GetABIReturnReg(1);
+ // Clear the upper 32 bits by two shift instructions.
+ // retReg = retReg << 96
+ // retReg = retReg >> 96
+ getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+ }
+#endif // FEATURE_SIMD
+ }
+ else
+ {
+#ifdef _TARGET_X86_
+ if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
+ {
+ // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+ // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
+ // correct argument registers.
+ returnReg = REG_PINVOKE_TCB;
+ }
+ else
+#endif // _TARGET_X86_
+ if (varTypeIsFloating(returnType))
+ {
+ returnReg = REG_FLOATRET;
+ }
+ else
+ {
+ returnReg = REG_INTRET;
+ }
+
+ if (call->gtRegNum != returnReg)
+ {
+ inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+ }
+ }
+
+ genProduceReg(call);
+ }
+ }
+
+ // If there is nothing next, that means the result is thrown away, so this value is not live.
+ // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+ if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+ {
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ }
+
+#if defined(_TARGET_X86_)
+ //-------------------------------------------------------------------------
+ // Create a label for tracking of region protected by the monitor in synchronized methods.
+ // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
+ // so the GC state vars have been updated before creating the label.
+
+ if (fPossibleSyncHelperCall)
+ {
+ switch (helperNum)
+ {
+ case CORINFO_HELP_MON_ENTER:
+ case CORINFO_HELP_MON_ENTER_STATIC:
+ noway_assert(compiler->syncStartEmitCookie == NULL);
+ compiler->syncStartEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ noway_assert(compiler->syncStartEmitCookie != NULL);
+ break;
+ case CORINFO_HELP_MON_EXIT:
+ case CORINFO_HELP_MON_EXIT_STATIC:
+ noway_assert(compiler->syncEndEmitCookie == NULL);
+ compiler->syncEndEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ noway_assert(compiler->syncEndEmitCookie != NULL);
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Is the caller supposed to pop the arguments?
+ if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0))
+ {
+ genAdjustSP(stackArgBytes);
+ }
+#endif // _TARGET_X86_
+}
+
+// Produce code for a GT_JMP node.
+// The arguments of the caller needs to be transferred to the callee before exiting caller.
+// The actual jump to callee is generated as part of caller epilog sequence.
+// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
+void CodeGen::genJmpMethod(GenTreePtr jmp)
+{
+ assert(jmp->OperGet() == GT_JMP);
+ assert(compiler->compJmpOpUsed);
+
+ // If no arguments, nothing to do
+ if (compiler->info.compArgsCount == 0)
+ {
+ return;
+ }
+
+ // Make sure register arguments are in their initial registers
+ // and stack arguments are put back as well.
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // First move any en-registered stack arguments back to the stack.
+ // At the same time any reg arg not in correct reg is moved back to its stack location.
+ //
+ // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
+ // But that would require us to deal with circularity while moving values around. Spilling
+ // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
+ // are not frequent.
+ for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+ {
+ varDsc = compiler->lvaTable + varNum;
+
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = compiler->lvaTable + fieldVarNum;
+ }
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
+ {
+ // Skip reg args which are already in its right register for jmp call.
+ // If not, we will spill such args to their stack locations.
+ //
+ // If we need to generate a tail call profiler hook, then spill all
+ // arg regs to free them up for the callback.
+ if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
+ {
+ continue;
+ }
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // Skip args which are currently living in stack.
+ continue;
+ }
+
+ // If we came here it means either a reg argument not in the right register or
+ // a stack argument currently living in a register. In either case the following
+ // assert should hold.
+ assert(varDsc->lvRegNum != REG_STK);
+
+ var_types loadType = varDsc->lvaArgType();
+ getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
+
+ // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of varDsc->lvRegNum.
+ regMaskTP tempMask = varDsc->lvRegMask();
+ regSet.RemoveMaskVars(tempMask);
+ gcInfo.gcMarkRegSetNpt(tempMask);
+ if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+ }
+#endif // DEBUG
+
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // At this point all arg regs are free.
+ // Emit tail call profiler callback.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif
+
+ // Next move any un-enregistered register arguments back to their register.
+ regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
+ unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+ for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+ {
+ varDsc = compiler->lvaTable + varNum;
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = compiler->lvaTable + fieldVarNum;
+ }
+ noway_assert(varDsc->lvIsParam);
+
+ // Skip if arg not passed in a register.
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varTypeIsStruct(varDsc))
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
+
+ unsigned __int8 offset0 = 0;
+ unsigned __int8 offset1 = 0;
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Get the eightbyte data
+ compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
+
+ // Move the values into the right registers.
+ //
+
+ // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
+ // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
+ // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks
+ // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
+ // genCodeForBBList().
+ if (type0 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
+ regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
+ }
+
+ if (type1 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
+ regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
+ }
+
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+ else
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Register argument
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+
+ // Is register argument already in the right register?
+ // If not load it from its stack location.
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
+ if (varDsc->lvRegNum != argReg)
+ {
+ assert(genIsValidReg(argReg));
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+ // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.AddMaskVars(genRegMask(argReg));
+ gcInfo.gcMarkRegPtrVal(argReg, loadType);
+ if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
+ }
+#endif // DEBUG
+
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+ }
+
+#if FEATURE_VARARG && defined(_TARGET_AMD64_)
+ // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
+ // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
+ // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
+ // values on the stack.
+ if (compiler->info.compIsVarArgs)
+ {
+ regNumber intArgReg;
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
+ if (varTypeIsFloating(loadType))
+ {
+ intArgReg = compiler->getCallArgIntRegister(argReg);
+ instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
+ inst_RV_RV(ins, argReg, intArgReg, loadType);
+ }
+ else
+ {
+ intArgReg = argReg;
+ }
+
+ fixedIntArgMask |= genRegMask(intArgReg);
+
+ if (intArgReg == REG_ARG_0)
+ {
+ assert(firstArgVarNum == BAD_VAR_NUM);
+ firstArgVarNum = varNum;
+ }
+ }
+#endif // FEATURE_VARARG
+ }
+
+#if FEATURE_VARARG && defined(_TARGET_AMD64_)
+ // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
+ // load the remaining arg registers (both int and float) from the corresponding
+ // shadow stack slots. This is for the reason that we don't know the number and type
+ // of non-fixed params passed by the caller, therefore we have to assume the worst case
+ // of caller passing float/double args both in int and float arg regs.
+ //
+ // This doesn't apply to x86, which doesn't pass floating point values in floating
+ // point registers.
+ //
+ // The caller could have passed gc-ref/byref type var args. Since these are var args
+ // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
+ // remaining arg registers from shadow stack slots as non-gc interruptible.
+ if (fixedIntArgMask != RBM_NONE)
+ {
+ assert(compiler->info.compIsVarArgs);
+ assert(firstArgVarNum != BAD_VAR_NUM);
+
+ regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+ if (remainingIntArgMask != RBM_NONE)
+ {
+ instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
+ getEmitter()->emitDisableGC();
+ for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
+ {
+ regNumber argReg = intArgRegs[argNum];
+ regMaskTP argRegMask = genRegMask(argReg);
+
+ if ((remainingIntArgMask & argRegMask) != 0)
+ {
+ remainingIntArgMask &= ~argRegMask;
+ getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
+
+ // also load it in corresponding float arg reg
+ regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
+ inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
+ }
+
+ argOffset += REGSIZE_BYTES;
+ }
+ getEmitter()->emitEnableGC();
+ }
+ }
+#endif // FEATURE_VARARG
+}
+
+// produce code for a GT_LEA subnode
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+ emitAttr size = emitTypeSize(lea);
+ genConsumeOperands(lea);
+
+ if (lea->Base() && lea->Index())
+ {
+ regNumber baseReg = lea->Base()->gtRegNum;
+ regNumber indexReg = lea->Index()->gtRegNum;
+ getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
+ }
+ else if (lea->Base())
+ {
+ getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset);
+ }
+ else if (lea->Index())
+ {
+ getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale,
+ lea->gtOffset);
+ }
+
+ genProduceReg(lea);
+}
+
+//-------------------------------------------------------------------------------------------
+// genJumpKindsForTree: Determine the number and kinds of conditional branches
+// necessary to implement the given GT_CMP node
+//
+// Arguments:
+// cmpTree - (input) The GenTree node that is used to set the Condition codes
+// - The GenTree Relop node that was used to set the Condition codes
+// jmpKind[2] - (output) One or two conditional branch instructions
+// jmpToTrueLabel[2] - (output) When true we branch to the true case
+// When false we create a second label and branch to the false case
+// Only GT_EQ for a floating point compares can have a false value.
+//
+// Return Value:
+// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
+//
+// Assumptions:
+// At least one conditional branch instruction will be returned.
+// Typically only one conditional branch is needed
+// and the second jmpKind[] value is set to EJ_NONE
+//
+// Notes:
+// jmpToTrueLabel[i]= true implies branch when the compare operation is true.
+// jmpToTrueLabel[i]= false implies branch when the compare operation is false.
+//-------------------------------------------------------------------------------------------
+
+// static
+void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
+{
+ // Except for BEQ (= ordered GT_EQ) both jumps are to the true label.
+ jmpToTrueLabel[0] = true;
+ jmpToTrueLabel[1] = true;
+
+ // For integer comparisons just use genJumpKindForOper
+ if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
+ {
+ CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
+ jmpKind[1] = EJ_NONE;
+ }
+ else
+ {
+ assert(cmpTree->OperIsCompare());
+
+ // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
+ // while generating code for compare opererators (e.g. GT_EQ etc).
+ if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+ {
+ // Must branch if we have an NaN, unordered
+ switch (cmpTree->gtOper)
+ {
+ case GT_LT:
+ case GT_GT:
+ jmpKind[0] = EJ_jb;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_LE:
+ case GT_GE:
+ jmpKind[0] = EJ_jbe;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_NE:
+ jmpKind[0] = EJ_jpe;
+ jmpKind[1] = EJ_jne;
+ break;
+
+ case GT_EQ:
+ jmpKind[0] = EJ_je;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
+ {
+ // Do not branch if we have an NaN, unordered
+ switch (cmpTree->gtOper)
+ {
+ case GT_LT:
+ case GT_GT:
+ jmpKind[0] = EJ_ja;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_LE:
+ case GT_GE:
+ jmpKind[0] = EJ_jae;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_NE:
+ jmpKind[0] = EJ_jne;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_EQ:
+ jmpKind[0] = EJ_jpe;
+ jmpKind[1] = EJ_je;
+ jmpToTrueLabel[0] = false;
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ }
+}
+
+#if !defined(_TARGET_64BIT_)
+//------------------------------------------------------------------------
+// genJumpKindsForTreeLongHi: Generate the jump types for compare
+// operators of the high parts of a compare with long type operands
+// on x86 for the case where rel-op result needs to be materialized into a
+// register.
+//
+// Arguments:
+// cmpTree - The GT_CMP node
+// jmpKind - Return array of jump kinds
+// jmpToTrueLabel - Return array of if the jump is going to true label
+//
+// Return Value:
+// None.
+//
+void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2])
+{
+ assert(cmpTree->OperIsCompare());
+ CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+ switch (cmpTree->gtOper)
+ {
+ case GT_LT:
+ case GT_LE:
+ if (compareKind == CK_SIGNED)
+ {
+ jmpKind[0] = EJ_jl;
+ jmpKind[1] = EJ_jg;
+ }
+ else
+ {
+ jmpKind[0] = EJ_jb;
+ jmpKind[1] = EJ_ja;
+ }
+ break;
+
+ case GT_GT:
+ case GT_GE:
+ if (compareKind == CK_SIGNED)
+ {
+ jmpKind[0] = EJ_jg;
+ jmpKind[1] = EJ_jl;
+ }
+ else
+ {
+ jmpKind[0] = EJ_ja;
+ jmpKind[1] = EJ_jb;
+ }
+ break;
+
+ case GT_EQ:
+ // GT_EQ will not jump to the true label if the hi parts are equal
+ jmpKind[0] = EJ_NONE;
+ jmpKind[1] = EJ_jne;
+ break;
+
+ case GT_NE:
+ // GT_NE will always jump to the true label if the high parts are not equal
+ jmpKind[0] = EJ_jne;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// genCompareLong: Generate code for comparing two longs on x86 when the result of the compare
+// is manifested in a register.
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+// Comments:
+// For long compares, we need to compare the high parts of operands first, then the low parts.
+// If the high compare is false, we do not need to compare the low parts. For less than and
+// greater than, if the high compare is true, we can assume the entire compare is true. For
+// compares that are realized in a register, we will generate:
+//
+// Opcode x86 equivalent Comment
+// ------ -------------- -------
+// GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare
+// jne label is false.
+// cmp loOp1,loOp2
+// label: sete
+//
+// GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare
+// jne label is true.
+// cmp loOp1,loOp2
+// label: setne
+//
+// GT_LT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne label correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// label: setb
+//
+// GT_LE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne label correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// label: setbe
+//
+// GT_GT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne label correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// label: seta
+//
+// GT_GE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne label correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// label: setae
+//
+// For signed long comparisons, we need additional labels, as we need to use signed conditions on the
+// "set" instruction:
+//
+// GT_LT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne labelHi correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// setb Unsigned set for lo compare
+// jmp labelFinal
+// labelHi: setl Signed set for high compare
+// labelFinal:
+//
+// GT_LE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne labelHi correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// setbe Unsigend set for lo compare
+// jmp labelFinal
+// labelHi: setle Signed set for hi compare
+// labelFinal:
+//
+// GT_GT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne labelHi correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// seta Unsigned set for lo compare
+// jmp labelFinal
+// labelHi: setg Signed set for high compare
+// labelFinal
+//
+// GT_GE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne labelHi correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// setae Unsigned set for lo compare
+// jmp labelFinal
+// labelHi: setge Signed set for hi compare
+// labelFinal:
+//
+// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
+void CodeGen::genCompareLong(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+
+ assert(varTypeIsLong(op1->TypeGet()));
+ assert(varTypeIsLong(op2->TypeGet()));
+
+ regNumber targetReg = treeNode->gtRegNum;
+
+ genConsumeOperands(tree);
+
+ assert(targetReg != REG_NA);
+
+ GenTreePtr loOp1 = op1->gtGetOp1();
+ GenTreePtr hiOp1 = op1->gtGetOp2();
+ GenTreePtr loOp2 = op2->gtGetOp1();
+ GenTreePtr hiOp2 = op2->gtGetOp2();
+
+ // Create compare for the high parts
+ instruction ins = INS_cmp;
+ var_types cmpType = TYP_INT;
+ emitAttr cmpAttr = emitTypeSize(cmpType);
+
+ // Emit the compare instruction
+ getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
+
+ // Generate the first jump for the high compare
+ CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+ BasicBlock* labelHi = genCreateTempLabel();
+ BasicBlock* labelFinal = genCreateTempLabel();
+
+ if (compareKind == CK_SIGNED && (tree->gtOper != GT_NE && tree->gtOper != GT_EQ))
+ {
+ // If we are doing a signed comparison, we need to do a signed set if the high compare is true,
+ // but an unsigned set if we fall through to the low compare. If we have a GT_NE or GT_EQ, we do not
+ // need to worry about the sign of the comparison, so we can use the simplified case.
+
+ // We only have to check for equality for the hi comparison. If they are not equal, then the set will
+ // do the right thing. If they are equal, we have to check the lo halves.
+ inst_JMP(EJ_jne, labelHi);
+
+ // Emit the comparison. Perform the set for the lo. Jump to labelFinal
+ getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
+
+ // The low set must be unsigned
+ emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+ inst_SET(jumpKindLo, targetReg);
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ genProduceReg(tree);
+
+ inst_JMP(EJ_jmp, labelFinal);
+
+ // Define the label for hi jump target here. If we have jumped here, we want to set
+ // the target register based on the jump kind of the actual compare type.
+
+ genDefineTempLabel(labelHi);
+ inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
+
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ genProduceReg(tree);
+
+ genDefineTempLabel(labelFinal);
+ }
+ else
+ {
+ // If the compare is unsigned, or if the sign doesn't change the set instruction, we can use
+ // the same set logic for both the hi and lo compare, so we don't need to jump to a high label,
+ // we can just jump to the set that the lo compare will use.
+
+ // We only have to check for equality for the hi comparison. If they are not equal, then the set will
+ // do the right thing. If they are equal, we have to check the lo halves.
+ inst_JMP(EJ_jne, labelFinal);
+
+ // Emit the comparison
+ getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
+
+ // Define the label for hi jump target here. If we have jumped here, we want to set
+ // the target register based on the jump kind of the lower half (the actual compare
+ // type). If we have fallen through, then we are doing a normal int compare for the
+ // lower parts
+
+ genDefineTempLabel(labelFinal);
+
+ // The low set must be unsigned
+ emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+ inst_SET(jumpKindLo, targetReg);
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ genProduceReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
+// is not manifested in a register.
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+// Comments:
+// For long compares, we need to compare the high parts of operands first, then the low parts.
+// We only have to do the low compare if the high parts of the operands are equal.
+//
+// In the case where the result of a rel-op is not realized in a register, we generate:
+//
+// Opcode x86 equivalent Comment
+// ------ -------------- -------
+//
+// GT_LT; unsigned cmp hiOp1,hiOp2
+// jb trueLabel
+// ja falseLabel
+// cmp loOp1,loOp2
+// jb trueLabel
+// falseLabel:
+//
+// GT_LE; unsigned cmp hiOp1,hiOp2
+// jb trueLabel
+// ja falseLabel
+// cmp loOp1,loOp2
+// jbe trueLabel
+// falseLabel:
+//
+// GT_GT; unsigned cmp hiOp1,hiOp2
+// ja trueLabel
+// jb falseLabel
+// cmp loOp1,loOp2
+// ja trueLabel
+// falseLabel:
+//
+// GT_GE; unsigned cmp hiOp1,hiOp2
+// ja trueLabel
+// jb falseLabel
+// cmp loOp1,loOp2
+// jae trueLabel
+// falseLabel:
+//
+// GT_LT; signed cmp hiOp1,hiOp2
+// jl trueLabel
+// jg falseLabel
+// cmp loOp1,loOp2
+// jb trueLabel
+// falseLabel:
+//
+// GT_LE; signed cmp hiOp1,hiOp2
+// jl trueLabel
+// jg falseLabel
+// cmp loOp1,loOp2
+// jbe trueLabel
+// falseLabel:
+//
+// GT_GT; signed cmp hiOp1,hiOp2
+// jg trueLabel
+// jl falseLabel
+// cmp loOp1,loOp2
+// ja trueLabel
+// falseLabel:
+//
+// GT_GE; signed cmp hiOp1,hiOp2
+// jg trueLabel
+// jl falseLabel
+// cmp loOp1,loOp2
+// jae trueLabel
+// falseLabel:
+//
+// GT_EQ; cmp hiOp1,hiOp2
+// jne falseLabel
+// cmp loOp1,loOp2
+// je trueLabel
+// falseLabel:
+//
+// GT_NE; cmp hiOp1,hiOp2
+// jne labelTrue
+// cmp loOp1,loOp2
+// jne trueLabel
+// falseLabel:
+//
+// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
+void CodeGen::genJTrueLong(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+
+ assert(varTypeIsLong(op1->TypeGet()));
+ assert(varTypeIsLong(op2->TypeGet()));
+
+ regNumber targetReg = treeNode->gtRegNum;
+
+ assert(targetReg == REG_NA);
+
+ GenTreePtr loOp1 = op1->gtGetOp1();
+ GenTreePtr hiOp1 = op1->gtGetOp2();
+ GenTreePtr loOp2 = op2->gtGetOp1();
+ GenTreePtr hiOp2 = op2->gtGetOp2();
+
+ // Emit the compare instruction
+ getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
+
+ // Generate the first jump for the high compare
+ CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+ // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
+ BasicBlock* falseLabel = genCreateTempLabel();
+
+ emitJumpKind jumpKindHi[2];
+
+ // Generate the jumps for the high compare
+ genJumpKindsForTreeLongHi(tree, jumpKindHi);
+
+ BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
+
+ if (jumpKindHi[0] != EJ_NONE)
+ {
+ inst_JMP(jumpKindHi[0], trueLabel);
+ }
+
+ if (jumpKindHi[1] != EJ_NONE)
+ {
+ inst_JMP(jumpKindHi[1], falseLabel);
+ }
+
+ // The low jump must be unsigned
+ emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+ // Emit the comparison and the jump to the trueLabel
+ getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
+
+ inst_JMP(jumpKindLo, trueLabel);
+
+ // Generate falseLabel, which is the false path. We will jump here if the high compare is false
+ // or fall through if the low compare is false.
+ genDefineTempLabel(falseLabel);
+}
+#endif //! defined(_TARGET_64BIT_)
+
+//------------------------------------------------------------------------
+// genCompareFloat: Generate code for comparing two floating point values
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+// Comments:
+// SSE2 instruction ucomis[s|d] is performs unordered comparison and
+// updates rFLAGS register as follows.
+// Result of compare ZF PF CF
+// ----------------- ------------
+// Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN.
+// Greater 0 0 0
+// Less Than 0 0 1
+// Equal 1 0 0
+//
+// From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
+// unordered comparison of floating point values. That is *.UN comparisons result in true when
+// one of the operands is a NaN whereas ordered comparisons results in false.
+//
+// Opcode Amd64 equivalent Comment
+// ------ ----------------- --------
+// BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above
+// jb table
+//
+// BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
+// ja
+//
+// BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered
+// jb
+//
+// BGT(a, b) ucomis[s|d] a, b branch if a>b
+// ja
+//
+// BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
+// jbe
+//
+// BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b
+// jae
+//
+// BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered
+// jbe
+//
+// BGE(a,b) ucomis[s|d] a, b branch if a>=b
+// jae
+//
+// BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec.
+// je This case is given for completeness, in case if JIT generates such
+// a gentree internally.
+//
+// BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b.
+// jpe L1
+// je <true label>
+// L1:
+//
+// BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is
+// jne given for completeness, in case if JIT generates such a gentree
+// internally.
+//
+// BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b
+// jpe <true label>
+// jne <true label>
+//
+// As we can see from the above equalities that the operands of a compare operator need to be
+// reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
+void CodeGen::genCompareFloat(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+ genConsumeOperands(tree);
+
+ assert(varTypeIsFloating(op1Type));
+ assert(op1Type == op2Type);
+
+ regNumber targetReg = treeNode->gtRegNum;
+ instruction ins;
+ emitAttr cmpAttr;
+
+ bool reverseOps;
+ if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+ {
+ // Unordered comparison case
+ reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
+ }
+ else
+ {
+ reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
+ }
+
+ if (reverseOps)
+ {
+ GenTreePtr tmp = op1;
+ op1 = op2;
+ op2 = tmp;
+ }
+
+ ins = ins_FloatCompare(op1Type);
+ cmpAttr = emitTypeSize(op1Type);
+
+ getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genCompareInt: Generate code for comparing ints or, on amd64, longs.
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+void CodeGen::genCompareInt(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+ genConsumeOperands(tree);
+
+ instruction ins;
+ emitAttr cmpAttr;
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
+ assert(!varTypeIsFloating(op2Type));
+
+#ifdef _TARGET_X86_
+ assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
+#endif // _TARGET_X86_
+
+ // By default we use an int32 sized cmp instruction
+ //
+ ins = INS_cmp;
+ var_types cmpType = TYP_INT;
+
+ // In the if/then/else statement below we may change the
+ // 'cmpType' and/or 'ins' to generate a smaller instruction
+
+ // Are we comparing two values that are the same size?
+ //
+ if (genTypeSize(op1Type) == genTypeSize(op2Type))
+ {
+ if (op1Type == op2Type)
+ {
+ // If both types are exactly the same we can use that type
+ cmpType = op1Type;
+ }
+ else if (genTypeSize(op1Type) == 8)
+ {
+ // If we have two different int64 types we need to use a long compare
+ cmpType = TYP_LONG;
+ }
+
+ cmpAttr = emitTypeSize(cmpType);
+ }
+ else // Here we know that (op1Type != op2Type)
+ {
+ // Do we have a short compare against a constant in op2?
+ //
+ // We checked for this case in LowerCmp() and if we can perform a small
+ // compare immediate we labeled this compare with a GTF_RELOP_SMALL
+ // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
+ //
+ if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
+ {
+ assert(varTypeIsSmall(op1Type));
+ cmpType = op1Type;
+ }
+#ifdef _TARGET_AMD64_
+ else // compare two different sized operands
+ {
+ // For this case we don't want any memory operands, only registers or immediates
+ //
+ assert(!op1->isContainedMemoryOp());
+ assert(!op2->isContainedMemoryOp());
+
+ // Check for the case where one operand is an int64 type
+ // Lower should have placed 32-bit operand in a register
+ // for signed comparisons we will sign extend the 32-bit value in place.
+ //
+ bool op1Is64Bit = (genTypeSize(op1Type) == 8);
+ bool op2Is64Bit = (genTypeSize(op2Type) == 8);
+ if (op1Is64Bit)
+ {
+ cmpType = TYP_LONG;
+ if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
+ {
+ assert(op2->gtRegNum != REG_NA);
+ inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
+ }
+ }
+ else if (op2Is64Bit)
+ {
+ cmpType = TYP_LONG;
+ if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
+ {
+ assert(op1->gtRegNum != REG_NA);
+ }
+ }
+ }
+#endif // _TARGET_AMD64_
+
+ cmpAttr = emitTypeSize(cmpType);
+ }
+
+ // See if we can generate a "test" instruction instead of a "cmp".
+ // For this to generate the correct conditional branch we must have
+ // a compare against zero.
+ //
+ if (op2->IsIntegralConst(0))
+ {
+ if (op1->isContained())
+ {
+ // op1 can be a contained memory op
+ // or the special contained GT_AND that we created in Lowering::LowerCmp()
+ //
+ if ((op1->OperGet() == GT_AND))
+ {
+ noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
+
+ ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
+ op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
+ op1 = op1->gtOp.gtOp1; // overwrite op1
+
+ if (op1->isContainedMemoryOp())
+ {
+ // use the size andOp1 if it is a contained memoryop.
+ cmpAttr = emitTypeSize(op1->TypeGet());
+ }
+ // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
+ }
+ }
+ else // op1 is not contained thus it must be in a register
+ {
+ ins = INS_test;
+ op2 = op1; // we will generate "test reg1,reg1"
+ // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
+ }
+ }
+
+ getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+}
+
+//-------------------------------------------------------------------------------------------
+// genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
+// corresponding to a binary Relational operator result.
+//
+// Arguments:
+// dstReg - The target register to set to 1 or 0
+// tree - The GenTree Relop node that was used to set the Condition codes
+//
+// Return Value: none
+//
+// Notes:
+// A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
+//-------------------------------------------------------------------------------------------
+
+void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
+{
+ noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
+
+ emitJumpKind jumpKind[2];
+ bool branchToTrueLabel[2];
+ genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
+
+ if (jumpKind[1] == EJ_NONE)
+ {
+ // Set (lower byte of) reg according to the flags
+ inst_SET(jumpKind[0], dstReg);
+ }
+ else
+ {
+#ifdef DEBUG
+ // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
+ // These are represented by two conditions.
+ if (tree->gtOper == GT_EQ)
+ {
+ // This must be an ordered comparison.
+ assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
+ }
+ else
+ {
+ // This must be BNE.UN
+ assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
+ }
+#endif
+
+ // Here is the sample code generated in each case:
+ // BEQ == cmp, jpe <false label>, je <true label>
+ // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
+ // setnp reg // if (PF==0) reg = 1 else reg = 0
+ // jpe L1 // Jmp if PF==1
+ // sete reg
+ // L1:
+ //
+ // BNE.UN == cmp, jpe <true label>, jne <true label>
+ // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
+ // setp reg
+ // jpe L1
+ // setne reg
+ // L1:
+
+ // reverse the jmpkind condition before setting dstReg if it is to false label.
+ inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
+
+ BasicBlock* label = genCreateTempLabel();
+ inst_JMP(jumpKind[0], label);
+
+ // second branch is always to true label
+ assert(branchToTrueLabel[1]);
+ inst_SET(jumpKind[1], dstReg);
+ genDefineTempLabel(label);
+ }
+
+ var_types treeType = tree->TypeGet();
+ if (treeType == TYP_INT || treeType == TYP_LONG)
+ {
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ }
+ else
+ {
+ noway_assert(treeType == TYP_BYTE);
+ }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+// This method handles integer overflow checking casts
+// as well as ordinary integer casts.
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The treeNode is not a contained node and must have an assigned register.
+// For a signed convert from byte, the source must be in a byte-addressable register.
+// Neither the source nor target type can be a floating point type.
+//
+// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
+// TODO: refactor to use getCastDescription
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CAST);
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+ var_types dstType = treeNode->CastToType();
+ bool isUnsignedDst = varTypeIsUnsigned(dstType);
+ var_types srcType = genActualType(castOp->TypeGet());
+ bool isUnsignedSrc = varTypeIsUnsigned(srcType);
+
+ // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
+ if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ srcType = genUnsignedType(srcType);
+ isUnsignedSrc = true;
+ }
+
+ bool requiresOverflowCheck = false;
+ bool needAndAfter = false;
+
+ assert(genIsValidIntReg(targetReg));
+ assert(genIsValidIntReg(sourceReg));
+
+ instruction ins = INS_invalid;
+ emitAttr size = EA_UNKNOWN;
+
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ // Widening cast
+
+ // Is this an Overflow checking cast?
+ // We only need to handle one case, as the other casts can never overflow.
+ // cast from TYP_INT to TYP_ULONG
+ //
+ if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG))
+ {
+ requiresOverflowCheck = true;
+ size = EA_ATTR(genTypeSize(srcType));
+ ins = INS_mov;
+ }
+ else
+ {
+ // we need the source size
+ size = EA_ATTR(genTypeSize(srcType));
+ noway_assert(size < EA_PTRSIZE);
+
+ ins = ins_Move_Extend(srcType, castOp->InReg());
+
+ /*
+ Special case: ins_Move_Extend assumes the destination type is no bigger
+ than TYP_INT. movsx and movzx can already extend all the way to
+ 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd),
+ but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode.
+ */
+ if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE))
+ {
+#ifdef _TARGET_X86_
+ NYI_X86("Cast to 64 bit for x86/RyuJIT");
+#else // !_TARGET_X86_
+ ins = INS_movsxd;
+#endif // !_TARGET_X86_
+ }
+
+ /*
+ Special case: for a cast of byte to char we first
+ have to expand the byte (w/ sign extension), then
+ mask off the high bits.
+ Use 'movsx' followed by 'and'
+ */
+ if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE))
+ {
+ noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
+ needAndAfter = true;
+ }
+ }
+ }
+ else
+ {
+ // Narrowing cast, or sign-changing cast
+ noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
+
+ // Is this an Overflow checking cast?
+ if (treeNode->gtOverflow())
+ {
+ requiresOverflowCheck = true;
+ size = EA_ATTR(genTypeSize(srcType));
+ ins = INS_mov;
+ }
+ else
+ {
+ size = EA_ATTR(genTypeSize(dstType));
+ ins = ins_Move_Extend(dstType, castOp->InReg());
+ }
+ }
+
+ noway_assert(ins != INS_invalid);
+
+ genConsumeReg(castOp);
+
+ if (requiresOverflowCheck)
+ {
+ ssize_t typeMin = 0;
+ ssize_t typeMax = 0;
+ ssize_t typeMask = 0;
+ bool needScratchReg = false;
+ bool signCheckOnly = false;
+
+ /* Do we need to compare the value, or just check masks */
+
+ switch (dstType)
+ {
+ case TYP_BYTE:
+ typeMask = ssize_t((int)0xFFFFFF80);
+ typeMin = SCHAR_MIN;
+ typeMax = SCHAR_MAX;
+ break;
+
+ case TYP_UBYTE:
+ typeMask = ssize_t((int)0xFFFFFF00L);
+ break;
+
+ case TYP_SHORT:
+ typeMask = ssize_t((int)0xFFFF8000);
+ typeMin = SHRT_MIN;
+ typeMax = SHRT_MAX;
+ break;
+
+ case TYP_CHAR:
+ typeMask = ssize_t((int)0xFFFF0000L);
+ break;
+
+ case TYP_INT:
+ if (srcType == TYP_UINT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ typeMask = 0xFFFFFFFF80000000LL;
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
+ }
+ break;
+
+ case TYP_UINT:
+ if (srcType == TYP_INT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ needScratchReg = true;
+ }
+ break;
+
+ case TYP_LONG:
+ noway_assert(srcType == TYP_ULONG);
+ signCheckOnly = true;
+ break;
+
+ case TYP_ULONG:
+ noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
+ signCheckOnly = true;
+ break;
+
+ default:
+ NO_WAY("Unknown type");
+ return;
+ }
+
+ if (signCheckOnly)
+ {
+ // We only need to check for a negative value in sourceReg
+ inst_RV_IV(INS_cmp, sourceReg, 0, size);
+ genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+ }
+ else
+ {
+ regNumber tmpReg = REG_NA;
+
+ if (needScratchReg)
+ {
+ // We need an additional temp register
+ // Make sure we have exactly one allocated.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+ }
+
+ // When we are converting from unsigned or to unsigned, we
+ // will only have to check for any bits set using 'typeMask'
+ if (isUnsignedSrc || isUnsignedDst)
+ {
+ if (needScratchReg)
+ {
+ inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero
+ }
+ else
+ {
+ noway_assert(typeMask != 0);
+ inst_RV_IV(INS_TEST, sourceReg, typeMask, size);
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+ }
+ }
+ else
+ {
+ // For a narrowing signed cast
+ //
+ // We must check the value is in a signed range.
+
+ // Compare with the MAX
+
+ noway_assert((typeMin != 0) && (typeMax != 0));
+
+ inst_RV_IV(INS_cmp, sourceReg, typeMax, size);
+ genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
+
+ // Compare with the MIN
+
+ inst_RV_IV(INS_cmp, sourceReg, typeMin, size);
+ genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+ }
+ }
+
+ if (targetReg != sourceReg
+#ifdef _TARGET_AMD64_
+ // On amd64, we can hit this path for a same-register
+ // 4-byte to 8-byte widening conversion, and need to
+ // emit the instruction to set the high bits correctly.
+ || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
+#endif // _TARGET_AMD64_
+ )
+ inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+ }
+ else // non-overflow checking cast
+ {
+ noway_assert(size < EA_PTRSIZE || srcType == dstType);
+
+ // We may have code transformations that result in casts where srcType is the same as dstType.
+ // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
+ // long constant to a long lclVar.
+ if (srcType == dstType)
+ {
+ ins = INS_mov;
+ }
+ /* Is the value sitting in a non-byte-addressable register? */
+ else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg))
+ {
+ if (isUnsignedDst)
+ {
+ // for unsigned values we can AND, so it need not be a byte register
+ ins = INS_AND;
+ }
+ else
+ {
+ // Move the value into a byte register
+ noway_assert(!"Signed byte convert from non-byte-addressable register");
+ }
+
+ /* Generate "mov targetReg, castOp->gtReg */
+ if (targetReg != sourceReg)
+ {
+ inst_RV_RV(INS_mov, targetReg, sourceReg, srcType);
+ }
+ }
+
+ if (ins == INS_AND)
+ {
+ noway_assert((needAndAfter == false) && isUnsignedDst);
+
+ /* Generate "and reg, MASK */
+ unsigned fillPattern;
+ if (size == EA_1BYTE)
+ {
+ fillPattern = 0xff;
+ }
+ else if (size == EA_2BYTE)
+ {
+ fillPattern = 0xffff;
+ }
+ else
+ {
+ fillPattern = 0xffffffff;
+ }
+
+ inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE);
+ }
+#ifdef _TARGET_AMD64_
+ else if (ins == INS_movsxd)
+ {
+ noway_assert(!needAndAfter);
+ inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+ }
+#endif // _TARGET_AMD64_
+ else if (ins == INS_mov)
+ {
+ noway_assert(!needAndAfter);
+ if (targetReg != sourceReg
+#ifdef _TARGET_AMD64_
+ // On amd64, 'mov' is the opcode used to zero-extend from
+ // 4 bytes to 8 bytes.
+ || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
+#endif // _TARGET_AMD64_
+ )
+ {
+ inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+ }
+ }
+ else
+ {
+ noway_assert(ins == INS_movsx || ins == INS_movzx);
+
+ /* Generate "mov targetReg, castOp->gtReg */
+ inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+
+ /* Mask off high bits for cast from byte to char */
+ if (needAndAfter)
+ {
+ noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
+ inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE);
+ }
+ }
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// The cast is between float and double or vice versa.
+//
+void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+ // float <--> double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+ // If not contained, must be a valid float reg.
+ if (!op1->isContained())
+ {
+ assert(genIsValidFloatReg(op1->gtRegNum));
+ }
+#endif
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ genConsumeOperands(treeNode->AsOp());
+ if (srcType == dstType && targetReg == op1->gtRegNum)
+ {
+ // source and destinations types are the same and also reside in the same register.
+ // we just need to consume and produce the reg in this case.
+ ;
+ }
+ else
+ {
+ instruction ins = ins_FloatConv(dstType, srcType);
+ getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
+{
+ // int type --> float/double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+ if (!op1->isContained())
+ {
+ assert(genIsValidIntReg(op1->gtRegNum));
+ }
+#endif
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+#if !defined(_TARGET_64BIT_)
+ NYI_IF(varTypeIsLong(srcType), "Conversion from long to float");
+#endif // !defined(_TARGET_64BIT_)
+
+ // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
+ // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except
+ // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
+ // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
+ // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
+ // temp and using temp as operand of cast operation.
+ if (srcType == TYP_BYREF)
+ {
+ noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
+ srcType = TYP_I_IMPL;
+ }
+
+ // force the srcType to unsigned if GT_UNSIGNED flag is set
+ if (treeNode->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ noway_assert(!varTypeIsGC(srcType));
+
+ // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
+ // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
+ // either the front-end or lowering phase to have generated two levels of cast.
+ // The first one is for widening smaller int type to int32 and the second one is
+ // to the float/double.
+ emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+ noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
+
+ // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
+ // here since they should have been lowered apropriately.
+ noway_assert(srcType != TYP_UINT);
+ noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
+
+ // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
+ // which does a partial write to lower 4/8 bytes of xmm register keeping the other
+ // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
+ // the partial write could introduce a false dependency and could cause a stall
+ // if there are further uses of xmmReg. We have such a case occuring with a
+ // customer reported version of SpectralNorm benchmark, resulting in 2x perf
+ // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
+ // cvtsi2ss/sd instruction.
+
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
+
+ // Note that here we need to specify srcType that will determine
+ // the size of source reg/mem operand and rex.w prefix.
+ instruction ins = ins_FloatConv(dstType, TYP_INT);
+ getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
+
+ // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
+ // will interpret ULONG value as LONG. Hence we need to adjust the
+ // result if sign-bit of srcType is set.
+ if (srcType == TYP_ULONG)
+ {
+ // The instruction sequence below is less accurate than what clang
+ // and gcc generate. However, we keep the current sequence for backward compatiblity.
+ // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
+ // should be also updated for consistent conversion result.
+ assert(dstType == TYP_DOUBLE);
+ assert(!op1->isContained());
+
+ // Set the flags without modifying op1.
+ // test op1Reg, op1Reg
+ inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
+
+ // No need to adjust result if op1 >= 0 i.e. positive
+ // Jge label
+ BasicBlock* label = genCreateTempLabel();
+ inst_JMP(EJ_jge, label);
+
+ // Adjust the result
+ // result = result + 0x43f00000 00000000
+ // addsd resultReg, 0x43f00000 00000000
+ GenTreePtr* cns = &u8ToDblBitmask;
+ if (*cns == nullptr)
+ {
+ double d;
+ static_assert_no_msg(sizeof(double) == sizeof(__int64));
+ *((__int64*)&d) = 0x43f0000000000000LL;
+
+ *cns = genMakeConst(&d, dstType, treeNode, true);
+ }
+ inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns);
+
+ genDefineTempLabel(label);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
+//
+void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
+{
+ // we don't expect to see overflow detecting float/double --> int type conversions here
+ // as they should have been converted into helper calls by front-end.
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidIntReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+ if (!op1->isContained())
+ {
+ assert(genIsValidFloatReg(op1->gtRegNum));
+ }
+#endif
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+ // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
+ // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
+ // front-end or lowering phase to have generated two levels of cast. The first one is
+ // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
+ // the required smaller int type.
+ emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+ noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
+
+ // We shouldn't be seeing uint64 here as it should have been converted
+ // into a helper call by either front-end or lowering phase.
+ noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
+
+ // If the dstType is TYP_UINT, we have 32-bits to encode the
+ // float number. Any of 33rd or above bits can be the sign bit.
+ // To acheive it we pretend as if we are converting it to a long.
+ if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
+ {
+ dstType = TYP_LONG;
+ }
+
+ // Note that we need to specify dstType here so that it will determine
+ // the size of destination integer register and also the rex.w prefix.
+ genConsumeOperands(treeNode->AsOp());
+ instruction ins = ins_FloatConv(TYP_INT, srcType);
+ getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCkfinite: Generate code for ckfinite opcode.
+//
+// Arguments:
+// treeNode - The GT_CKFINITE node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// GT_CKFINITE node has reserved an internal register.
+//
+// TODO-XArch-CQ - mark the operand as contained if known to be in
+// memory (e.g. field or an array element).
+//
+void CodeGen::genCkfinite(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CKFINITE);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ var_types targetType = treeNode->TypeGet();
+ int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
+ regNumber targetReg = treeNode->gtRegNum;
+
+ // Extract exponent into a register.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ genConsumeReg(op1);
+
+#ifdef _TARGET_64BIT_
+
+ // Copy the floating-point value to an integer register. If we copied a float to a long, then
+ // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
+ // bits of the integer register.
+ instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
+ inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType);
+ if (targetType == TYP_DOUBLE)
+ {
+ // right shift by 32 bits to get to exponent.
+ inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
+ }
+
+ // Mask exponent with all 1's and check if the exponent is all 1's
+ inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
+ inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
+
+ // If exponent is all 1's, throw ArithmeticException
+ genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
+
+ // if it is a finite value copy it to targetReg
+ if (targetReg != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+
+#else // !_TARGET_64BIT_
+
+ // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register.
+ // There is no easy way to do this. To not require an extra register, we'll use shuffles
+ // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we
+ // need to produce the value into the target register.
+ //
+ // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
+ // movaps targetReg, op1->gtRegNum
+ // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
+ // and tmpReg, <mask>
+ // cmp tmpReg, <mask>
+ // je <throw block>
+ // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
+ //
+ // For TYP_DOUBLE with (targetReg == op1->gtRegNum):
+ // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
+ // and tmpReg, <mask>
+ // cmp tmpReg, <mask>
+ // je <throw block>
+ // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
+ //
+ // For TYP_FLOAT, it's the same as _TARGET_64BIT_:
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
+ // and tmpReg, <mask>
+ // cmp tmpReg, <mask>
+ // je <throw block>
+ // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
+
+ regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
+
+ if (targetType == TYP_DOUBLE)
+ {
+ if (targetReg != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
+ copyToTmpSrcReg = targetReg;
+ }
+ else
+ {
+ copyToTmpSrcReg = op1->gtRegNum;
+ }
+
+ // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
+ // value, no matter the floating-point type.
+ inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
+
+ // Mask exponent with all 1's and check if the exponent is all 1's
+ inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
+ inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
+
+ // If exponent is all 1's, throw ArithmeticException
+ genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
+
+ if (targetReg != op1->gtRegNum)
+ {
+ // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched,
+ // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE
+ // than re-shuffling the targetReg.
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+ else if (targetType == TYP_DOUBLE)
+ {
+ // We need to re-shuffle the targetReg to get the correct result.
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
+ }
+
+#endif // !_TARGET_64BIT_
+
+ genProduceReg(treeNode);
+}
+
+#ifdef _TARGET_AMD64_
+int CodeGenInterface::genSPtoFPdelta()
+{
+ int delta;
+
+#ifdef PLATFORM_UNIX
+
+ // We require frame chaining on Unix to support native tool unwinding (such as
+ // unwinding by the native debugger). We have a CLR-only extension to the
+ // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240.
+ // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
+ delta = genTotalFrameSize();
+
+#else // !PLATFORM_UNIX
+
+ // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
+ // RBP needs to be reported in unwind codes. This case would arise for methods
+ // with localloc.
+ if (compiler->compLocallocUsed)
+ {
+ // We cannot base delta computation on compLclFrameSize since it changes from
+ // tentative to final frame layout and hence there is a possibility of
+ // under-estimating offset of vars from FP, which in turn results in under-
+ // estimating instruction size.
+ //
+ // To be predictive and so as never to under-estimate offset of vars from FP
+ // we will always position FP at min(240, outgoing arg area size).
+ delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
+ }
+ else if (compiler->opts.compDbgEnC)
+ {
+ // vm assumption on EnC methods is that rsp and rbp are equal
+ delta = 0;
+ }
+ else
+ {
+ delta = genTotalFrameSize();
+ }
+
+#endif // !PLATFORM_UNIX
+
+ return delta;
+}
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the total size of the stack frame, including local size,
+// callee-saved register size, etc. For AMD64, this does not include the caller-pushed
+// return address.
+//
+// Return value:
+// Total frame size
+//
+
+int CodeGenInterface::genTotalFrameSize()
+{
+ assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+ int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+ assert(totalFrameSize >= 0);
+ return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+//
+// We can't compute this directly from the Caller-SP, since the frame pointer
+// is based on a maximum delta from Initial-SP, so first we find SP, then
+// compute the FP offset.
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+ assert(isFramePointerUsed());
+ int callerSPtoFPdelta;
+
+ callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
+
+ assert(callerSPtoFPdelta <= 0);
+ return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+ int callerSPtoSPdelta = 0;
+
+ callerSPtoSPdelta -= genTotalFrameSize();
+ callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
+
+ // compCalleeRegsPushed does not account for the frame pointer
+ // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
+ if (isFramePointerUsed())
+ {
+ callerSPtoSPdelta -= REGSIZE_BYTES;
+ }
+
+ assert(callerSPtoSPdelta <= 0);
+ return callerSPtoSPdelta;
+}
+#endif // _TARGET_AMD64_
+
+//-----------------------------------------------------------------------------------------
+// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
+//
+// Arguments:
+// treeNode - tree node
+//
+// Return value:
+// None
+//
+// Assumptions:
+// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
+// ii) tree type is floating point type.
+// iii) caller of this routine needs to call genProduceReg()
+void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
+{
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ assert(varTypeIsFloating(targetType));
+
+ float f;
+ double d;
+ GenTreePtr* bitMask = nullptr;
+ instruction ins = INS_invalid;
+ void* cnsAddr = nullptr;
+ bool dblAlign = false;
+
+ switch (treeNode->OperGet())
+ {
+ case GT_NEG:
+ // Neg(x) = flip the sign bit.
+ // Neg(f) = f ^ 0x80000000
+ // Neg(d) = d ^ 0x8000000000000000
+ ins = genGetInsForOper(GT_XOR, targetType);
+ if (targetType == TYP_FLOAT)
+ {
+ bitMask = &negBitmaskFlt;
+
+ static_assert_no_msg(sizeof(float) == sizeof(int));
+ *((int*)&f) = 0x80000000;
+ cnsAddr = &f;
+ }
+ else
+ {
+ bitMask = &negBitmaskDbl;
+
+ static_assert_no_msg(sizeof(double) == sizeof(__int64));
+ *((__int64*)&d) = 0x8000000000000000LL;
+ cnsAddr = &d;
+ dblAlign = true;
+ }
+ break;
+
+ case GT_INTRINSIC:
+ assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
+
+ // Abs(x) = set sign-bit to zero
+ // Abs(f) = f & 0x7fffffff
+ // Abs(d) = d & 0x7fffffffffffffff
+ ins = genGetInsForOper(GT_AND, targetType);
+ if (targetType == TYP_FLOAT)
+ {
+ bitMask = &absBitmaskFlt;
+
+ static_assert_no_msg(sizeof(float) == sizeof(int));
+ *((int*)&f) = 0x7fffffff;
+ cnsAddr = &f;
+ }
+ else
+ {
+ bitMask = &absBitmaskDbl;
+
+ static_assert_no_msg(sizeof(double) == sizeof(__int64));
+ *((__int64*)&d) = 0x7fffffffffffffffLL;
+ cnsAddr = &d;
+ dblAlign = true;
+ }
+ break;
+
+ default:
+ assert(!"genSSE2: unsupported oper");
+ unreached();
+ break;
+ }
+
+ if (*bitMask == nullptr)
+ {
+ assert(cnsAddr != nullptr);
+ *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign);
+ }
+
+ // We need an additional register for bitmask.
+ // Make sure we have one allocated.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ // Move operand into targetReg only if the reg reserved for
+ // internal purpose is not the same as targetReg.
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained());
+ regNumber operandReg = genConsumeReg(op1);
+ if (tmpReg != targetReg)
+ {
+ if (operandReg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
+ }
+
+ operandReg = tmpReg;
+ }
+
+ inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask);
+ assert(ins != INS_invalid);
+ inst_RV_RV(ins, targetReg, operandReg, targetType);
+}
+
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+// treeNode - the GT_INTRINSIC node
+//
+// Return value:
+// None
+//
+void CodeGen::genIntrinsic(GenTreePtr treeNode)
+{
+ // Right now only Sqrt/Abs are treated as math intrinsics.
+ switch (treeNode->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sqrt:
+ noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode,
+ treeNode->gtOp.gtOp1);
+ break;
+
+ case CORINFO_INTRINSIC_Abs:
+ genSSE2BitwiseOp(treeNode);
+ break;
+
+ default:
+ assert(!"genIntrinsic: Unsupported intrinsic");
+ unreached();
+ }
+
+ genProduceReg(treeNode);
+}
+
+//-------------------------------------------------------------------------- //
+// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+// The number of the base variable.
+//
+// Note:
+// If tail call the outgoing args are placed in the caller's incoming arg stack space.
+// Otherwise, they go in the outgoing arg area on the current frame.
+//
+// On Windows the caller always creates slots (homing space) in its frame for the
+// first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
+// For System V systems there is no such calling convention requirement, and the code needs to find
+// the first stack passed argument from the caller. This is done by iterating over
+// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
+//
+unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+
+ unsigned baseVarNum;
+
+#if FEATURE_FASTTAILCALL
+ bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
+#else
+ const bool putInIncomingArgArea = false;
+#endif
+
+ // Whether to setup stk arg in incoming or out-going arg area?
+ // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+ // All other calls - stk arg is setup in out-going arg area.
+ if (putInIncomingArgArea)
+ {
+ // See the note in the function header re: finding the first stack passed argument.
+ baseVarNum = getFirstArgWithStackSlot();
+ assert(baseVarNum != BAD_VAR_NUM);
+
+#ifdef DEBUG
+ // This must be a fast tail call.
+ assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
+
+ // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+ // because fast tail call requires that in-coming arg area of caller is >= out-going
+ // arg area required for tail call.
+ LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
+ assert(varDsc != nullptr);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
+ assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // !DEBUG
+ }
+ else
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ baseVarNum = compiler->lvaOutgoingArgSpaceVar;
+#else // !FEATURE_FIXED_OUT_ARGS
+ NYI_X86("Stack args for x86/RyuJIT");
+ baseVarNum = BAD_VAR_NUM;
+#endif // !FEATURE_FIXED_OUT_ARGS
+ }
+
+ return baseVarNum;
+}
+
+//--------------------------------------------------------------------- //
+// genPutStructArgStk - generate code for passing an arg on the stack.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// targetType - the type of the treeNode
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgStk(GenTreePtr treeNode)
+{
+ var_types targetType = treeNode->TypeGet();
+#ifdef _TARGET_X86_
+ noway_assert(targetType != TYP_STRUCT);
+
+ // The following logic is applicable for x86 arch.
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+ GenTreePtr data = treeNode->gtOp.gtOp1;
+
+ // On a 32-bit target, all of the long arguments have been decomposed into
+ // a separate putarg_stk for each of the upper and lower halves.
+ noway_assert(targetType != TYP_LONG);
+
+ int argSize = genTypeSize(genActualType(targetType));
+ genStackLevel += argSize;
+
+ // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
+ if (data->isContainedIntOrIImmed())
+ {
+ if (data->IsIconHandle())
+ {
+ inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ inst_IV(INS_push, data->gtIntCon.gtIconVal);
+ }
+ }
+ else if (data->isContained())
+ {
+ NYI_X86("Contained putarg_stk of non-constant");
+ }
+ else
+ {
+ genConsumeReg(data);
+ if (varTypeIsIntegralOrI(targetType))
+ {
+ inst_RV(INS_push, data->gtRegNum, targetType);
+ }
+ else
+ {
+ // Decrement SP.
+ inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
+ getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
+ }
+ }
+#else // !_TARGET_X86_
+ {
+ unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (varTypeIsStruct(targetType))
+ {
+ genPutStructArgStk(treeNode, baseVarNum);
+ return;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ noway_assert(targetType != TYP_STRUCT);
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+ // Get argument offset on stack.
+ // Here we cross check that argument offset hasn't changed from lowering to codegen since
+ // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+ int argOffset = treeNode->AsPutArgStk()->getArgOffset();
+
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ assert(curArgTabEntry);
+ assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
+#endif
+
+ GenTreePtr data = treeNode->gtGetOp1();
+
+ if (data->isContained())
+ {
+ getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
+ (int)data->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ genConsumeReg(data);
+ getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
+ argOffset);
+ }
+ }
+#endif // !_TARGET_X86_
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+//---------------------------------------------------------------------
+// genPutStructArgStk - generate code for copying a struct arg on the stack by value.
+// In case there are references to heap object in the struct,
+// it generates the gcinfo as well.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// baseVarNum - the variable number relative to which to put the argument on the stack.
+// For tail calls this is the baseVarNum = 0.
+// For non tail calls this is the outgoingArgSpace.
+//
+// Return value:
+// None
+//
+void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+ assert(baseVarNum != BAD_VAR_NUM);
+
+ var_types targetType = treeNode->TypeGet();
+
+ if (varTypeIsSIMD(targetType))
+ {
+ regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
+ assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
+ getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum,
+ treeNode->AsPutArgStk()->getArgOffset());
+ return;
+ }
+
+ assert(targetType == TYP_STRUCT);
+
+ GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
+ if (putArgStk->gtNumberReferenceSlots == 0)
+ {
+ switch (putArgStk->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::PutArgStkKindRepInstr:
+ genStructPutArgRepMovs(putArgStk, baseVarNum);
+ break;
+ case GenTreePutArgStk::PutArgStkKindUnroll:
+ genStructPutArgUnroll(putArgStk, baseVarNum);
+ break;
+ default:
+ unreached();
+ }
+ }
+ else
+ {
+ // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
+ GenTreePtr dstAddr = putArgStk;
+ GenTreePtr src = putArgStk->gtOp.gtOp1;
+ assert(src->OperGet() == GT_OBJ);
+ GenTreePtr srcAddr = src->gtGetOp1();
+
+ unsigned slots = putArgStk->gtNumSlots;
+
+ // We are always on the stack we don't need to use the write barrier.
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
+
+ unsigned i = 0;
+ unsigned copiedSlots = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // to save cycles and code size.
+ {
+ unsigned nonGcSlotCount = 0;
+
+ do
+ {
+ nonGcSlotCount++;
+ i++;
+ } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+ // If we have a very small contiguous non-gc region, it's better just to
+ // emit a sequence of movsq instructions
+ if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ copiedSlots += nonGcSlotCount;
+ while (nonGcSlotCount > 0)
+ {
+ instGen(INS_movsq);
+ nonGcSlotCount--;
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+ copiedSlots += nonGcSlotCount;
+ instGen(INS_r_movsq);
+ }
+ }
+ break;
+
+ case TYPE_GC_REF: // Is an object ref
+ case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
+ {
+ // We have a GC (byref or ref) pointer
+ // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
+ // but the logic for emitting a GC info record is not available (it is internal for the emitter
+ // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
+ // instGen(INS_movsq); and emission of gc info.
+
+ var_types memType;
+ if (gcPtrs[i] == TYPE_GC_REF)
+ {
+ memType = TYP_REF;
+ }
+ else
+ {
+ assert(gcPtrs[i] == TYPE_GC_BYREF);
+ memType = TYP_BYREF;
+ }
+
+ getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
+ getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum,
+ ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
+
+ // Source for the copy operation.
+ // If a LocalAddr, use EA_PTRSIZE - copy from stack.
+ // If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
+ getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI,
+ TARGET_POINTER_SIZE);
+
+ // Always copying to the stack - outgoing arg area
+ // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
+ getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
+ copiedSlots++;
+ gcPtrCount--;
+ i++;
+ }
+ break;
+
+ default:
+ unreached();
+ break;
+ }
+ }
+
+ assert(gcPtrCount == 0);
+ }
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+/*****************************************************************************
+ *
+ * Create and record GC Info for the function.
+ */
+#ifdef _TARGET_AMD64_
+void
+#else // !_TARGET_AMD64_
+void*
+#endif // !_TARGET_AMD64_
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
+{
+#ifdef JIT32_GCENCODER
+ return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+#else // !JIT32_GCENCODER
+ genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+#endif // !JIT32_GCENCODER
+}
+
+#ifdef JIT32_GCENCODER
+void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr))
+{
+ BYTE headerBuf[64];
+ InfoHdr header;
+
+ int s_cached;
+#ifdef DEBUG
+ size_t headerSize =
+#endif
+ compiler->compInfoBlkSize =
+ gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+ size_t argTabOffset = 0;
+ size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
+
+#if DISPLAY_SIZES
+
+ if (genInterruptible)
+ {
+ gcHeaderISize += compiler->compInfoBlkSize;
+ gcPtrMapISize += ptrMapSize;
+ }
+ else
+ {
+ gcHeaderNSize += compiler->compInfoBlkSize;
+ gcPtrMapNSize += ptrMapSize;
+ }
+
+#endif // DISPLAY_SIZES
+
+ compiler->compInfoBlkSize += ptrMapSize;
+
+ /* Allocate the info block for the method */
+
+ compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
+
+#if 0 // VERBOSE_SIZES
+ // TODO-X86-Cleanup: 'dataSize', below, is not defined
+
+// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
+ {
+ printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
+ compiler->info.compILCodeSize,
+ compiler->compInfoBlkSize,
+ codeSize + dataSize,
+ codeSize + dataSize - prologSize - epilogSize,
+ 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
+ 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
+ compiler->info.compClassName,
+ compiler->info.compMethodName);
+}
+
+#endif
+
+ /* Fill in the info block and return it to the caller */
+
+ void* infoPtr = compiler->compInfoBlkAddr;
+
+ /* Create the method info block: header followed by GC tracking tables */
+
+ compiler->compInfoBlkAddr +=
+ gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+ assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
+ compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
+ assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
+
+#ifdef DEBUG
+
+ if (0)
+ {
+ BYTE* temp = (BYTE*)infoPtr;
+ unsigned size = compiler->compInfoBlkAddr - temp;
+ BYTE* ptab = temp + headerSize;
+
+ noway_assert(size == headerSize + ptrMapSize);
+
+ printf("Method info block - header [%u bytes]:", headerSize);
+
+ for (unsigned i = 0; i < size; i++)
+ {
+ if (temp == ptab)
+ {
+ printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
+ printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
+ }
+ else
+ {
+ if (!(i % 16))
+ printf("\n %04X: ", i);
+ }
+
+ printf("%02X ", *temp++);
+ }
+
+ printf("\n");
+ }
+
+#endif // DEBUG
+
+#if DUMP_GC_TABLES
+
+ if (compiler->opts.dspGCtbls)
+ {
+ const BYTE* base = (BYTE*)infoPtr;
+ unsigned size;
+ unsigned methodSize;
+ InfoHdr dumpHeader;
+
+ printf("GC Info for method %s\n", compiler->info.compFullName);
+ printf("GC info size = %3u\n", compiler->compInfoBlkSize);
+
+ size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
+ // printf("size of header encoding is %3u\n", size);
+ printf("\n");
+
+ if (compiler->opts.dspGCtbls)
+ {
+ base += size;
+ size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
+ // printf("size of pointer table is %3u\n", size);
+ printf("\n");
+ noway_assert(compiler->compInfoBlkAddr == (base + size));
+ }
+ }
+
+#ifdef DEBUG
+ if (jitOpts.testMask & 128)
+ {
+ for (unsigned offs = 0; offs < codeSize; offs++)
+ {
+ gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
+ }
+ }
+#endif // DEBUG
+#endif // DUMP_GC_TABLES
+
+ /* Make sure we ended up generating the expected number of bytes */
+
+ noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
+
+ return infoPtr;
+}
+
+#else // !JIT32_GCENCODER
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+ IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+ GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
+ GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+ assert(gcInfoEncoder);
+
+ // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+ gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+ // First we figure out the encoder ID's for the stack slots and registers.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+ // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+ gcInfoEncoder->FinalizeSlotIds();
+ // Now we can actually use those slot ID's to declare live ranges.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+#if defined(DEBUGGING_SUPPORT)
+ if (compiler->opts.compDbgEnC)
+ {
+ // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+ // which is:
+ // -return address
+ // -saved off RBP
+ // -saved 'this' pointer and bool for synchronized methods
+
+ // 4 slots for RBP + return address + RSI + RDI
+ int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+ if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+ {
+ preservedAreaSize += REGSIZE_BYTES;
+ }
+
+ // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
+ preservedAreaSize += 4;
+ }
+
+ // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
+ // frame
+ gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+ }
+#endif
+
+ gcInfoEncoder->Build();
+
+ // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+ // let's save the values anyway for debugging purposes
+ compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+ compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+#endif // !JIT32_GCENCODER
+
+/*****************************************************************************
+ * Emit a call to a helper function.
+ *
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg)
+{
+ void* addr = nullptr;
+ void* pAddr = nullptr;
+
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+ regNumber callTarget = REG_NA;
+ regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+
+ if (!addr)
+ {
+ assert(pAddr != nullptr);
+
+ // Absolute indirect call addr
+ // Note: Order of checks is important. First always check for pc-relative and next
+ // zero-relative. Because the former encoding is 1-byte smaller than the latter.
+ if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
+ genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
+ {
+ // generate call whose target is specified by 32-bit offset relative to PC or zero.
+ callType = emitter::EC_FUNC_TOKEN_INDIR;
+ addr = pAddr;
+ }
+ else
+ {
+#ifdef _TARGET_AMD64_
+ // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
+ // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
+ // make the call.
+ // mov reg, addr
+ // call [reg]
+
+ if (callTargetReg == REG_NA)
+ {
+ // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+ // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+ callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+ regMaskTP callTargetMask = genRegMask(callTargetReg);
+ noway_assert((callTargetMask & killMask) == callTargetMask);
+ }
+ else
+ {
+ // The call target must not overwrite any live variable, though it may not be in the
+ // kill set for the call.
+ regMaskTP callTargetMask = genRegMask(callTargetReg);
+ noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
+ }
+#endif
+
+ callTarget = callTargetReg;
+ CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL);
+ callType = emitter::EC_INDIR_ARD;
+ }
+ }
+
+ getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
+ retSize FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // IL offset
+ callTarget, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helper));
+
+ regTracker.rsTrashRegSet(killMask);
+ regTracker.rsTrashRegsForGCInterruptability();
+}
+
+#if !defined(_TARGET_64BIT_)
+//-----------------------------------------------------------------------------
+//
+// Code Generation for Long integers
+//
+//-----------------------------------------------------------------------------
+
+//------------------------------------------------------------------------
+// genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
+//
+// Arguments:
+// treeNode - A TYP_LONG lclVar node.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
+// Its operand must be a GT_LONG node.
+//
+void CodeGen::genStoreLongLclVar(GenTree* treeNode)
+{
+ emitter* emit = getEmitter();
+
+ GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
+ unsigned lclNum = lclNode->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ assert(varDsc->TypeGet() == TYP_LONG);
+ assert(!varDsc->lvPromoted);
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ noway_assert(op1->OperGet() == GT_LONG);
+ genConsumeRegs(op1);
+
+ // Definitions of register candidates will have been lowered to 2 int lclVars.
+ assert(!treeNode->InReg());
+
+ GenTreePtr loVal = op1->gtGetOp1();
+ GenTreePtr hiVal = op1->gtGetOp2();
+ // NYI: Contained immediates.
+ NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
+ emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
+ emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+}
+#endif // !defined(_TARGET_64BIT_)
+
+/*****************************************************************************
+* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
+* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
+* disassembler thinks the instructions as the same as we do.
+*/
+
+// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
+// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
+//#define ALL_XARCH_EMITTER_UNIT_TESTS
+
+#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+void CodeGen::genAmd64EmitterUnitTests()
+{
+ if (!verbose)
+ {
+ return;
+ }
+
+ if (!compiler->opts.altJit)
+ {
+ // No point doing this in a "real" JIT.
+ return;
+ }
+
+ // Mark the "fake" instructions in the output.
+ printf("*************** In genAmd64EmitterUnitTests()\n");
+
+ // We use this:
+ // genDefineTempLabel(genCreateTempLabel());
+ // to create artificial labels to help separate groups of tests.
+
+ //
+ // Loads
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef ALL_XARCH_EMITTER_UNIT_TESTS
+#ifdef FEATURE_AVX_SUPPORT
+ genDefineTempLabel(genCreateTempLabel());
+
+ // vhaddpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddps xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulps xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vandps xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vandpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vandps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vandpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vorps xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vorpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vorps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vorpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+ // vdivss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+#endif // FEATURE_AVX_SUPPORT
+#endif // ALL_XARCH_EMITTER_UNIT_TESTS
+ printf("*************** End of genAmd64EmitterUnitTests()\n");
+}
+
+#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+void CodeGen::genSetScopeInfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& varLoc)
+{
+ /* We need to do some mapping while reporting back these variables */
+
+ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+ noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+ VarName name = nullptr;
+
+#ifdef DEBUG
+
+ for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+ {
+ if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+ {
+ name = compiler->info.compVarScopes[scopeNum].vsdName;
+ }
+ }
+
+ // Hang on to this compiler->info.
+
+ TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+ tlvi.tlviVarNum = ilVarNum;
+ tlvi.tlviLVnum = LVnum;
+ tlvi.tlviName = name;
+ tlvi.tlviStartPC = startOffs;
+ tlvi.tlviLength = length;
+ tlvi.tlviAvailable = avail;
+ tlvi.tlviVarLoc = varLoc;
+
+#endif // DEBUG
+
+ compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+#endif // DEBUGGING_SUPPORT
+
+#endif // _TARGET_AMD64_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 97a98291c2..455b78ff10 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -1878,7 +1878,7 @@ public:
GenTreePtr gtNewOneConNode(var_types type);
GenTreeBlk* gtNewBlkOpNode(
- genTreeOps oper, GenTreePtr dst, GenTreePtr srcOrFillVal, GenTreePtr sizeOrClsTok, bool volatil);
+ genTreeOps oper, GenTreePtr dst, GenTreePtr srcOrFillVal, GenTreePtr sizeOrClsTok, bool isVolatile);
GenTree* gtNewBlkOpNode(GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock);
@@ -1891,7 +1891,7 @@ public:
GenTree* gtNewStructVal(CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr);
GenTree* gtNewBlockVal(GenTreePtr addr, unsigned size);
- GenTree* gtNewCpObjNode(GenTreePtr dst, GenTreePtr src, CORINFO_CLASS_HANDLE structHnd, bool volatil);
+ GenTree* gtNewCpObjNode(GenTreePtr dst, GenTreePtr src, CORINFO_CLASS_HANDLE structHnd, bool isVolatile);
GenTreeArgList* gtNewListNode(GenTreePtr op1, GenTreeArgList* op2);
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 25958bcf74..d60e9c59b0 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -7165,10 +7165,6 @@ GenTree* Compiler::gtNewBlkOpNode(
if (isCopyBlock)
{
srcOrFillVal->gtFlags |= GTF_DONT_CSE;
- if (srcOrFillVal->OperIsIndir() && (srcOrFillVal->gtGetOp1()->gtOper == GT_ADDR))
- {
- srcOrFillVal = srcOrFillVal->gtGetOp1()->gtGetOp1();
- }
}
GenTree* result = gtNewAssignNode(dst, srcOrFillVal);
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 6f74a3678e..8120aa8209 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -1453,11 +1453,6 @@ public:
case GT_LIST:
case GT_INTRINSIC:
case GT_LEA:
- case GT_STOREIND:
- case GT_BLK:
- case GT_OBJ:
- case GT_STORE_BLK:
- case GT_STORE_OBJ:
#ifdef FEATURE_SIMD
case GT_SIMD:
#endif // !FEATURE_SIMD
@@ -4009,10 +4004,6 @@ struct GenTreeObj : public GenTreeBlk
// Let's assert it just to be safe.
noway_assert(roundUp(gtBlkSize, REGSIZE_BYTES) == gtBlkSize);
}
- else
- {
- // ChangeOper(GT_BLK);
- }
}
void CopyGCInfo(GenTreeObj* srcObj)
@@ -5059,7 +5050,7 @@ inline var_types& GenTree::CastToType()
// Returns true iff the object being copied contains one or more GC pointers.
//
// Notes:
-// Of the block ops only GT_COPYOBJ is allowed to have GC pointers.
+// Of the block nodes, only GT_OBJ and ST_STORE_OBJ are allowed to have GC pointers.
//
inline bool GenTreeBlk::HasGCPtr()
{
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index bc8e65aced..a03bcfe4b0 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -75,9 +75,9 @@ GTNODE(STOREIND , "storeIndir" ,0,GTK_BINOP|GTK_NOVALUE) // store ind
// TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes
GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk" ,0,GTK_SPECIAL|GTK_NOVALUE) // array bounds check
-GTNODE(OBJ , "obj" ,0,GTK_BINOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
+GTNODE(OBJ , "obj" ,0,GTK_UNOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
GTNODE(STORE_OBJ , "storeObj" ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
-GTNODE(BLK , "blk" ,0,GTK_BINOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
+GTNODE(BLK , "blk" ,0,GTK_UNOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
GTNODE(STORE_BLK , "storeBlk" ,0,GTK_BINOP|GTK_NOVALUE) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
GTNODE(DYN_BLK , "DynBlk" ,0,GTK_SPECIAL) // Dynamically sized block object
GTNODE(STORE_DYN_BLK , "storeDynBlk" ,0,GTK_SPECIAL|GTK_NOVALUE) // Dynamically sized block object
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index 98ab2bc7a2..0fbdb27770 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -6943,7 +6943,6 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
{
// For now, assume arbitrary side effects on the heap...
- // JBTODO [ddetlefs, 11/2012] Why not be complete, and get this case right?
heapHavoc = true;
}
}
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index d17a0d912f..da4d6a6c51 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -157,7 +157,7 @@ void Compiler::fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, G
}
}
-// Rewrite a non-TYP_STRUCT indirection as GT_IND(GT_LEA(obj.op1)), or as a simple
+// Rewrite a SIMD indirection as GT_IND(GT_LEA(obj.op1)), or as a simple
// lclVar if possible.
//
// Arguments: